forked from OSchip/llvm-project
Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass,
which is faster, simpler, and less surprising. llvm-svn: 106263
This commit is contained in:
parent
0136309f5a
commit
b92156d5e4
|
@ -292,7 +292,6 @@ private:
|
|||
MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB);
|
||||
void LowerArguments(const BasicBlock *BB);
|
||||
|
||||
void ShrinkDemandedOps();
|
||||
void ComputeLiveOutVRegInfo();
|
||||
|
||||
/// Create the scheduler. If a specific scheduler was specified
|
||||
|
|
|
@ -766,14 +766,12 @@ public:
|
|||
SelectionDAG &DAG;
|
||||
bool LegalTys;
|
||||
bool LegalOps;
|
||||
bool ShrinkOps;
|
||||
SDValue Old;
|
||||
SDValue New;
|
||||
|
||||
explicit TargetLoweringOpt(SelectionDAG &InDAG,
|
||||
bool LT, bool LO,
|
||||
bool Shrink = false) :
|
||||
DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {}
|
||||
bool LT, bool LO) :
|
||||
DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
|
||||
|
||||
bool LegalTypes() const { return LegalTys; }
|
||||
bool LegalOperations() const { return LegalOps; }
|
||||
|
|
|
@ -2028,7 +2028,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
|
|||
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
|
||||
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
|
||||
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
|
||||
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
|
||||
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
|
||||
//
|
||||
// do not sink logical op inside of a vector extend, since it may combine
|
||||
// into a vsetcc.
|
||||
|
@ -2038,7 +2038,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
|
|||
// Avoid infinite looping with PromoteIntBinOp.
|
||||
(N0.getOpcode() == ISD::ANY_EXTEND &&
|
||||
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
|
||||
(N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
|
||||
(N0.getOpcode() == ISD::TRUNCATE &&
|
||||
(!TLI.isZExtFree(VT, Op0VT) ||
|
||||
!TLI.isTruncateFree(Op0VT, VT)) &&
|
||||
TLI.isTypeLegal(Op0VT))) &&
|
||||
!VT.isVector() &&
|
||||
Op0VT == N1.getOperand(0).getValueType() &&
|
||||
(!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
|
||||
|
@ -2425,6 +2428,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
|
|||
if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
|
||||
return SDValue(Rot, 0);
|
||||
|
||||
// Simplify the operands using demanded-bits information.
|
||||
if (!VT.isVector() &&
|
||||
SimplifyDemandedBits(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -3158,6 +3166,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
|
|||
return NewSRL;
|
||||
}
|
||||
|
||||
// Attempt to convert a srl of a load into a narrower zero-extending load.
|
||||
SDValue NarrowLoad = ReduceLoadWidth(N);
|
||||
if (NarrowLoad.getNode())
|
||||
return NarrowLoad;
|
||||
|
||||
// Here is a common situation. We want to optimize:
|
||||
//
|
||||
// %a = ...
|
||||
|
@ -3635,10 +3648,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
|
|||
|
||||
// fold (zext (truncate x)) -> (and x, mask)
|
||||
if (N0.getOpcode() == ISD::TRUNCATE &&
|
||||
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
|
||||
(!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
|
||||
N0.getValueType()) ||
|
||||
!TLI.isZExtFree(N0.getValueType(), VT))) {
|
||||
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
|
||||
SDValue Op = N0.getOperand(0);
|
||||
if (Op.getValueType().bitsLT(VT)) {
|
||||
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
|
||||
|
@ -4024,6 +4034,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
|
|||
/// extended, also fold the extension to form a extending load.
|
||||
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
|
||||
unsigned Opc = N->getOpcode();
|
||||
|
||||
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -4040,6 +4051,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
|
|||
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
|
||||
if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
|
||||
return SDValue();
|
||||
} else if (Opc == ISD::SRL) {
|
||||
// Annother special-case: SRL is basically zero-extending a narrower
|
||||
// value.
|
||||
ExtType = ISD::ZEXTLOAD;
|
||||
N0 = SDValue(N, 0);
|
||||
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
|
||||
if (!N01) return SDValue();
|
||||
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
|
||||
VT.getSizeInBits() - N01->getZExtValue());
|
||||
}
|
||||
|
||||
unsigned EVTBits = ExtVT.getSizeInBits();
|
||||
|
@ -4243,8 +4263,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
|||
|
||||
// fold (truncate (load x)) -> (smaller load x)
|
||||
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
|
||||
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
|
||||
return ReduceLoadWidth(N);
|
||||
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
|
||||
SDValue Reduced = ReduceLoadWidth(N);
|
||||
if (Reduced.getNode())
|
||||
return Reduced;
|
||||
}
|
||||
|
||||
// Simplify the operands using demanded-bits information.
|
||||
if (!VT.isVector() &&
|
||||
SimplifyDemandedBits(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -2474,10 +2474,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
|
|||
VT.getVectorNumElements() ==
|
||||
Operand.getValueType().getVectorNumElements()) &&
|
||||
"Vector element count mismatch!");
|
||||
|
||||
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
|
||||
OpOpcode == ISD::ANY_EXTEND)
|
||||
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
|
||||
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
|
||||
|
||||
// (ext (trunx x)) -> x
|
||||
if (OpOpcode == ISD::TRUNCATE) {
|
||||
SDValue OpOp = Operand.getNode()->getOperand(0);
|
||||
if (OpOp.getValueType() == VT)
|
||||
return OpOp;
|
||||
}
|
||||
break;
|
||||
case ISD::TRUNCATE:
|
||||
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
|
||||
|
|
|
@ -372,102 +372,6 @@ public:
|
|||
};
|
||||
}
|
||||
|
||||
/// TrivialTruncElim - Eliminate some trivial nops that can result from
|
||||
/// ShrinkDemandedOps: (trunc (ext n)) -> n.
|
||||
static bool TrivialTruncElim(SDValue Op,
|
||||
TargetLowering::TargetLoweringOpt &TLO) {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
EVT VT = Op.getValueType();
|
||||
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
|
||||
N0.getOpcode() == ISD::SIGN_EXTEND ||
|
||||
N0.getOpcode() == ISD::ANY_EXTEND) &&
|
||||
N0.getOperand(0).getValueType() == VT) {
|
||||
return TLO.CombineTo(Op, N0.getOperand(0));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// ShrinkDemandedOps - A late transformation pass that shrink expressions
|
||||
/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
|
||||
/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
|
||||
void SelectionDAGISel::ShrinkDemandedOps() {
|
||||
SmallVector<SDNode*, 128> Worklist;
|
||||
SmallPtrSet<SDNode*, 128> InWorklist;
|
||||
|
||||
// Add all the dag nodes to the worklist.
|
||||
Worklist.reserve(CurDAG->allnodes_size());
|
||||
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
|
||||
E = CurDAG->allnodes_end(); I != E; ++I) {
|
||||
Worklist.push_back(I);
|
||||
InWorklist.insert(I);
|
||||
}
|
||||
|
||||
TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
|
||||
while (!Worklist.empty()) {
|
||||
SDNode *N = Worklist.pop_back_val();
|
||||
InWorklist.erase(N);
|
||||
|
||||
if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
|
||||
// Deleting this node may make its operands dead, add them to the worklist
|
||||
// if they aren't already there.
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
if (InWorklist.insert(N->getOperand(i).getNode()))
|
||||
Worklist.push_back(N->getOperand(i).getNode());
|
||||
|
||||
CurDAG->DeleteNode(N);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Run ShrinkDemandedOp on scalar binary operations.
|
||||
if (N->getNumValues() != 1 ||
|
||||
!N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
|
||||
continue;
|
||||
|
||||
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
|
||||
APInt Demanded = APInt::getAllOnesValue(BitWidth);
|
||||
APInt KnownZero, KnownOne;
|
||||
if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
|
||||
KnownZero, KnownOne, TLO) &&
|
||||
(N->getOpcode() != ISD::TRUNCATE ||
|
||||
!TrivialTruncElim(SDValue(N, 0), TLO)))
|
||||
continue;
|
||||
|
||||
// Revisit the node.
|
||||
assert(!InWorklist.count(N) && "Already in worklist");
|
||||
Worklist.push_back(N);
|
||||
InWorklist.insert(N);
|
||||
|
||||
// Replace the old value with the new one.
|
||||
DEBUG(errs() << "\nShrinkDemandedOps replacing ";
|
||||
TLO.Old.getNode()->dump(CurDAG);
|
||||
errs() << "\nWith: ";
|
||||
TLO.New.getNode()->dump(CurDAG);
|
||||
errs() << '\n');
|
||||
|
||||
if (InWorklist.insert(TLO.New.getNode()))
|
||||
Worklist.push_back(TLO.New.getNode());
|
||||
|
||||
SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
|
||||
CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
|
||||
|
||||
if (!TLO.Old.getNode()->use_empty()) continue;
|
||||
|
||||
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
|
||||
i != e; ++i) {
|
||||
SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
|
||||
if (OpNode->hasOneUse()) {
|
||||
// Add OpNode to the end of the list to revisit.
|
||||
DeadNodes.RemoveFromWorklist(OpNode);
|
||||
Worklist.push_back(OpNode);
|
||||
InWorklist.insert(OpNode);
|
||||
}
|
||||
}
|
||||
|
||||
DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
|
||||
CurDAG->DeleteNode(TLO.Old.getNode());
|
||||
}
|
||||
}
|
||||
|
||||
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
|
||||
SmallPtrSet<SDNode*, 128> VisitedNodes;
|
||||
SmallVector<SDNode*, 128> Worklist;
|
||||
|
@ -636,10 +540,8 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
|
|||
DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
|
||||
DEBUG(CurDAG->dump());
|
||||
|
||||
if (OptLevel != CodeGenOpt::None) {
|
||||
ShrinkDemandedOps();
|
||||
if (OptLevel != CodeGenOpt::None)
|
||||
ComputeLiveOutVRegInfo();
|
||||
}
|
||||
|
||||
if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
|
||||
|
||||
|
|
|
@ -1042,7 +1042,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
|||
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
|
||||
return true;
|
||||
// If the operation can be done in a smaller type, do so.
|
||||
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
return true;
|
||||
|
||||
// Output known-1 bits are only known if set in both the LHS & RHS.
|
||||
|
@ -1076,7 +1076,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
|||
if (TLO.ShrinkDemandedConstant(Op, NewMask))
|
||||
return true;
|
||||
// If the operation can be done in a smaller type, do so.
|
||||
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
return true;
|
||||
|
||||
// Output known-0 bits are only known if clear in both the LHS & RHS.
|
||||
|
@ -1101,7 +1101,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
|||
if ((KnownZero2 & NewMask) == NewMask)
|
||||
return TLO.CombineTo(Op, Op.getOperand(1));
|
||||
// If the operation can be done in a smaller type, do so.
|
||||
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
return true;
|
||||
|
||||
// If all of the unknown bits are known to be zero on one side or the other
|
||||
|
@ -1548,7 +1548,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
|||
KnownOne2, TLO, Depth+1))
|
||||
return true;
|
||||
// See if the operation should be performed at a smaller bit width.
|
||||
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
|
||||
return true;
|
||||
}
|
||||
// FALL THROUGH
|
||||
|
|
|
@ -9641,8 +9641,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
|||
if (ShAmt1.getOpcode() == ISD::SUB) {
|
||||
SDValue Sum = ShAmt1.getOperand(0);
|
||||
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
|
||||
if (SumC->getSExtValue() == Bits &&
|
||||
ShAmt1.getOperand(1) == ShAmt0)
|
||||
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
|
||||
if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
|
||||
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
|
||||
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
|
||||
return DAG.getNode(Opc, DL, VT,
|
||||
Op0, Op1,
|
||||
DAG.getNode(ISD::TRUNCATE, DL,
|
||||
|
|
|
@ -21,3 +21,8 @@ define i32* @test3(i32* %P, i32 %X) {
|
|||
ret i32* %P2
|
||||
}
|
||||
|
||||
define fastcc i32 @test4(i32* %d) nounwind {
|
||||
%tmp4 = load i32* %d
|
||||
%tmp512 = lshr i32 %tmp4, 24
|
||||
ret i32 %tmp512
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ entry:
|
|||
; X64: movw %si, 2(%rdi)
|
||||
|
||||
; X32: test4:
|
||||
; X32: movzwl 8(%esp), %eax
|
||||
; X32: movl 8(%esp), %eax
|
||||
; X32: movw %ax, 2(%{{.*}})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue