Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass,

which is faster, simpler, and less surprising.

llvm-svn: 106263
This commit is contained in:
Dan Gohman 2010-06-18 01:05:21 +00:00
parent 0136309f5a
commit b92156d5e4
9 changed files with 62 additions and 119 deletions

View File

@ -292,7 +292,6 @@ private:
MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB);
void LowerArguments(const BasicBlock *BB);
void ShrinkDemandedOps();
void ComputeLiveOutVRegInfo();
/// Create the scheduler. If a specific scheduler was specified

View File

@ -766,14 +766,12 @@ public:
SelectionDAG &DAG;
bool LegalTys;
bool LegalOps;
bool ShrinkOps;
SDValue Old;
SDValue New;
explicit TargetLoweringOpt(SelectionDAG &InDAG,
bool LT, bool LO,
bool Shrink = false) :
DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {}
bool LT, bool LO) :
DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
bool LegalTypes() const { return LegalTys; }
bool LegalOperations() const { return LegalOps; }

View File

@ -2028,7 +2028,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
//
// do not sink logical op inside of a vector extend, since it may combine
// into a vsetcc.
@ -2038,7 +2038,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
(N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
(N0.getOpcode() == ISD::TRUNCATE &&
(!TLI.isZExtFree(VT, Op0VT) ||
!TLI.isTruncateFree(Op0VT, VT)) &&
TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
Op0VT == N1.getOperand(0).getValueType() &&
(!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
@ -2425,6 +2428,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
return SDValue(Rot, 0);
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}
@ -3158,6 +3166,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSRL;
}
// Attempt to convert a srl of a load into a narrower zero-extending load.
SDValue NarrowLoad = ReduceLoadWidth(N);
if (NarrowLoad.getNode())
return NarrowLoad;
// Here is a common situation. We want to optimize:
//
// %a = ...
@ -3635,10 +3648,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
(!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
N0.getValueType()) ||
!TLI.isZExtFree(N0.getValueType(), VT))) {
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@ -4024,6 +4034,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
/// extended, also fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@ -4040,6 +4051,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
return SDValue();
} else if (Opc == ISD::SRL) {
// Annother special-case: SRL is basically zero-extending a narrower
// value.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!N01) return SDValue();
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
unsigned EVTBits = ExtVT.getSizeInBits();
@ -4243,8 +4263,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
return ReduceLoadWidth(N);
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
SDValue Reduced = ReduceLoadWidth(N);
if (Reduced.getNode())
return Reduced;
}
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();
}

View File

@ -2474,10 +2474,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
// (ext (trunx x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
SDValue OpOp = Operand.getNode()->getOperand(0);
if (OpOp.getValueType() == VT)
return OpOp;
}
break;
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&

View File

@ -372,102 +372,6 @@ public:
};
}
/// TrivialTruncElim - Eliminate some trivial nops that can result from
/// ShrinkDemandedOps: (trunc (ext n)) -> n.
static bool TrivialTruncElim(SDValue Op,
TargetLowering::TargetLoweringOpt &TLO) {
SDValue N0 = Op.getOperand(0);
EVT VT = Op.getValueType();
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND) &&
N0.getOperand(0).getValueType() == VT) {
return TLO.CombineTo(Op, N0.getOperand(0));
}
return false;
}
/// ShrinkDemandedOps - A late transformation pass that shrink expressions
/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
void SelectionDAGISel::ShrinkDemandedOps() {
SmallVector<SDNode*, 128> Worklist;
SmallPtrSet<SDNode*, 128> InWorklist;
// Add all the dag nodes to the worklist.
Worklist.reserve(CurDAG->allnodes_size());
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ++I) {
Worklist.push_back(I);
InWorklist.insert(I);
}
TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
while (!Worklist.empty()) {
SDNode *N = Worklist.pop_back_val();
InWorklist.erase(N);
if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
// Deleting this node may make its operands dead, add them to the worklist
// if they aren't already there.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
if (InWorklist.insert(N->getOperand(i).getNode()))
Worklist.push_back(N->getOperand(i).getNode());
CurDAG->DeleteNode(N);
continue;
}
// Run ShrinkDemandedOp on scalar binary operations.
if (N->getNumValues() != 1 ||
!N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
continue;
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
APInt KnownZero, KnownOne;
if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
KnownZero, KnownOne, TLO) &&
(N->getOpcode() != ISD::TRUNCATE ||
!TrivialTruncElim(SDValue(N, 0), TLO)))
continue;
// Revisit the node.
assert(!InWorklist.count(N) && "Already in worklist");
Worklist.push_back(N);
InWorklist.insert(N);
// Replace the old value with the new one.
DEBUG(errs() << "\nShrinkDemandedOps replacing ";
TLO.Old.getNode()->dump(CurDAG);
errs() << "\nWith: ";
TLO.New.getNode()->dump(CurDAG);
errs() << '\n');
if (InWorklist.insert(TLO.New.getNode()))
Worklist.push_back(TLO.New.getNode());
SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
if (!TLO.Old.getNode()->use_empty()) continue;
for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
i != e; ++i) {
SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
if (OpNode->hasOneUse()) {
// Add OpNode to the end of the list to revisit.
DeadNodes.RemoveFromWorklist(OpNode);
Worklist.push_back(OpNode);
InWorklist.insert(OpNode);
}
}
DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
CurDAG->DeleteNode(TLO.Old.getNode());
}
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
@ -636,10 +540,8 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
DEBUG(CurDAG->dump());
if (OptLevel != CodeGenOpt::None) {
ShrinkDemandedOps();
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
}
if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);

View File

@ -1042,7 +1042,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
return true;
// If the operation can be done in a smaller type, do so.
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@ -1076,7 +1076,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
// If the operation can be done in a smaller type, do so.
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@ -1101,7 +1101,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownZero2 & NewMask) == NewMask)
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// If all of the unknown bits are known to be zero on one side or the other
@ -1548,7 +1548,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne2, TLO, Depth+1))
return true;
// See if the operation should be performed at a smaller bit width.
if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
}
// FALL THROUGH

View File

@ -9641,8 +9641,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
if (SumC->getSExtValue() == Bits &&
ShAmt1.getOperand(1) == ShAmt0)
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,

View File

@ -21,3 +21,8 @@ define i32* @test3(i32* %P, i32 %X) {
ret i32* %P2
}
define fastcc i32 @test4(i32* %d) nounwind {
%tmp4 = load i32* %d
%tmp512 = lshr i32 %tmp4, 24
ret i32 %tmp512
}

View File

@ -67,7 +67,7 @@ entry:
; X64: movw %si, 2(%rdi)
; X32: test4:
; X32: movzwl 8(%esp), %eax
; X32: movl 8(%esp), %eax
; X32: movw %ax, 2(%{{.*}})
}