Fix inline cost predictions with SCIENCE.

After running a batch of measurements, it is clear that the inliner metrics need some adjustments: Own argument bonus: 20 -> 5 Outgoing argument penalty: 0 -> 5 Alloca bonus: 10 -> 5 Constant instr bonus: 7 -> 5 Dead successor bonus: 40 -> 5*(avg instrs/block) The new cost metrics are generaly 25 points higher than before, so we may need to move thresholds. With this change, InlineConstants::CallPenalty becomes a political correction: if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) NumInsts += InlineConstants::CallPenalty + CS.arg_size(); The code size is accurately modelled by CS.arg_size(). CallPenalty is added because calls tend to take a long time, so it may not be worth it to inline a function with lots of calls. All of the political corrections are in the InlineConstants namespace: IndirectCallBonus, CallPenalty, LastCallToStaticBonus, ColdccPenalty, NoreturnPenalty. llvm-svn: 94615
2010-01-26 23:21:56 +00:00 · 2010-01-26 23:21:56 +00:00 · 0234628284
parent c7b91156e4
commit 0234628284
2 changed files with 34 additions and 29 deletions
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@ -64,7 +64,9 @@ namespace llvm {

  namespace InlineConstants {
    // Various magic constants used to adjust heuristics.
-    const int CallPenalty = 5;
+    const int InstrCost = 5;
+    const int IndirectCallBonus = 500;
+    const int CallPenalty = 5;  // In instrs, so multiply by InstrCost.
    const int LastCallToStaticBonus = -15000;
    const int ColdccPenalty = 2000;
    const int NoreturnPenalty = 10000;
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@ -25,18 +25,23 @@ unsigned InlineCostAnalyzer::FunctionInfo::
         CountCodeReductionForConstant(Value *V) {
  unsigned Reduction = 0;
  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<BranchInst>(*UI))
-      Reduction += 40;          // Eliminating a conditional branch is a big win
-    else if (SwitchInst *SI = dyn_cast<SwitchInst>(*UI))
-      // Eliminating a switch is a big win, proportional to the number of edges
-      // deleted.
-      Reduction += (SI->getNumSuccessors()-1) * 40;
-    else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+    if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) {
+      // We will be able to eliminate all but one of the successors.
+      const TerminatorInst &TI = cast<TerminatorInst>(**UI);
+      const unsigned NumSucc = TI.getNumSuccessors();
+      unsigned Instrs = 0;
+      for (unsigned I = 0; I != NumSucc; ++I)
+        Instrs += TI.getSuccessor(I)->size();
+      // We don't know which blocks will be eliminated, so use the average size.
+      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
      // Turning an indirect call into a direct call is a BIG win
-      Reduction += CI->getCalledValue() == V ? 500 : 0;
+      if (CI->getCalledValue() == V)
+        Reduction += InlineConstants::IndirectCallBonus;
    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
      // Turning an indirect call into a direct call is a BIG win
-      Reduction += II->getCalledValue() == V ? 500 : 0;
+      if (II->getCalledValue() == V)
+        Reduction += InlineConstants::IndirectCallBonus;
    } else {
      // Figure out if this instruction will be removed due to simple constant
      // propagation.
@ -62,7 +67,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::

      if (AllOperandsConstant) {
        // We will get to remove this instruction...
-        Reduction += 7;
+        Reduction += InlineConstants::InstrCost;

        // And any other instructions that use it which become constants
        // themselves.
@ -84,11 +89,14 @@ unsigned InlineCostAnalyzer::FunctionInfo::
  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
    Instruction *I = cast<Instruction>(*UI);
    if (isa<LoadInst>(I) || isa<StoreInst>(I))
-      Reduction += 10;
+      Reduction += InlineConstants::InstrCost;
    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
      // If the GEP has variable indices, we won't be able to do much with it.
      if (GEP->hasAllConstantIndices())
        Reduction += CountCodeReductionForAlloca(GEP);
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+      // Track pointer through bitcasts.
+      Reduction += CountCodeReductionForAlloca(BCI);
    } else {
      // If there is some other strange instruction, we're not going to be able
      // to do much if we inline this.
@ -155,10 +163,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
            (F->getName() == "setjmp" || F->getName() == "_setjmp"))
          NeverInline = true;

-      // Calls often compile into many machine instructions.  Bump up their
-      // cost to reflect this.
+      // Each argument to a call takes on average one instruction to set up.
+      // Add an extra penalty because calls can take a long time to execute.
      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction()))
-        NumInsts += InlineConstants::CallPenalty;
+        NumInsts += InlineConstants::CallPenalty + CS.arg_size();
    }
    
    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@ -316,20 +324,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
       I != E; ++I, ++ArgNo) {
    // Each argument passed in has a cost at both the caller and the callee
-    // sides.  This favors functions that take many arguments over functions
-    // that take few arguments.
-    InlineCost -= 20;
-    
-    // If this is a function being passed in, it is very likely that we will be
-    // able to turn an indirect function call into a direct function call.
-    if (isa<Function>(I))
-      InlineCost -= 100;
+    // sides.  Measurements show that each argument costs about the same as an
+    // instruction.
+    InlineCost -= InlineConstants::InstrCost;

    // If an alloca is passed in, inlining this function is likely to allow
    // significant future optimization possibilities (like scalar promotion, and
    // scalarization), so encourage the inlining of the function.
    //
-    else if (isa<AllocaInst>(I)) {
+    if (isa<AllocaInst>(I)) {
      if (ArgNo < CalleeFI.ArgumentWeights.size())
        InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight;

@ -351,7 +354,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
  InlineCost += Caller->size()/15;
  
  // Look at the size of the callee. Each instruction counts as 5.
-  InlineCost += CalleeFI.Metrics.NumInsts*5;
+  InlineCost += CalleeFI.Metrics.NumInsts*InlineConstants::InstrCost;

  return llvm::InlineCost::get(InlineCost);
 }