llvm-project/llvm/lib/Analysis/IPA/CallGraph.cpp

//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This interface is used to build and manipulate a call graph, which is a very
// useful tool for interprocedural optimization.
//
// Every function in a module is represented as a node in the call graph.  The
// callgraph node keeps track of which functions the are called by the function
// corresponding to the node.
//
// A call graph will contain nodes where the function that they correspond to is
// null.  This 'external' node is used to represent control flow that is not
// represented (or analyzable) in the module.  As such, the external node will
// have edges to functions with the following properties:
//   1. All functions in the module without internal linkage, since they could
//      be called by functions outside of the our analysis capability.
//   2. All functions whose address is used for something more than a direct
//      call, for example being stored into a memory location.  Since they may
//      be called by an unknown caller later, they must be tracked as such.
//
// Similarly, functions have a call edge to the external node iff:
//   1. The function is external, reflecting the fact that they could call
//      anything without internal linkage or that has its address taken.
//   2. The function contains an indirect function call.
//
// As an extension in the future, there may be multiple nodes with a null
// function.  These will be used when we can prove (through pointer analysis)
// that an indirect call site can call only a specific set of functions.
//
// Because of these properties, the CallGraph captures a conservative superset
// of all of the caller-callee relationships, which is useful for
// transformations.
//
// The CallGraph class also attempts to figure out what the root of the
// CallGraph is, which is currently does by looking for a function named 'main'.
// If no function named 'main' is found, the external node is used as the entry
// node, reflecting the fact that any function without internal linkage could
// be called into (which is common for libraries).
//
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/CallGraph.h"
#include "llvm/Constants.h"     // Remove when ConstantPointerRefs are gone
#include "llvm/Module.h"
#include "llvm/iOther.h"
#include "llvm/iTerminators.h"
#include "llvm/Support/CallSite.h"
#include "Support/STLExtras.h"

namespace llvm {

static RegisterAnalysis<CallGraph> X("callgraph", "Call Graph Construction");

static const char * const KnownExternalFunctions[] = {
  // Low-level system calls
  "open",
  "read",
  "write",
  "writev",
  "lseek",
  "poll",
  "ioctl",

  // Low-level stdc library functions
  "abort", "exit",
  "getenv", "putenv",

  // Standard IO functions
  "printf",
  "sprintf",
  "fopen",
  "freopen",
  "fclose",
  "fwrite",
  "puts",
  "fputs",
  "getc",
  "ungetc",
  "putc",
  "putchar",
  "fread",
  "fileno",
  "ftell",
  "fflush",
  "fseek",
  "fileno",
  "ferror",
  "feof",
  "fdopen",
  "__fxstat",
  "setbuf",
  "setbuffer",
  "etlinebuf",
  "setvbuf",

  // Memory functions
  "malloc",
  "free",
  "realloc",
  "calloc",
  "memalign",

  // String functions
  "atoi",
  "memmove",
  "memset",
  "memchr",
  "memcmp",
  "strchr",
  "strncpy",
  "strncmp",
  "strcmp",
  "strtok",
  "__strcoll_l",
  "__strxfrm_l",
  "__strftime_l",
  "__strtol_l",
  "__strtoul_l",
  "__strtoll_l",
  "__strtoull_l",
  "__strtof_l",
  "__strtod_l",
  "__strtold_l",
  "isalpha",

  // Math functions
  "exp", "sqrt", "cbrt", "hypot",
  "log", "log10", "pow",
  "sin", "cos", "tan",
  "asin", "acos", "atan", "atan2",

  // Locale functions
  "__uselocale",
  "__newlocale",
  "__freelocale",
  "__duplocale",
  "__nl_langinfo_l",

  // gettext functions used by libstdc++
  "gettext",
  "dgettext",
  "dcgettext",
  "textdomain",
  "bindtextdomain",

  // Random stuff
  "__assert_fail",
  "__errno_location",
  "clock", "time",
  "__main",
};


/// ExternalFunctionDoesntCallIntoProgram - This hack is used to indicate to the
/// call graph that the specified external function is _KNOWN_ to not call back
/// into the program.  This is important, because otherwise functions which call
/// "printf" for example, end up in a great big SCC that goes from the function
/// through main.
///
static bool ExternalFunctionDoesntCallIntoProgram(const std::string &Name) {
  static std::vector<std::string> Funcs;

  // First time this is called?
  if (Funcs.empty()) {
    // Add a whole bunch of functions which are often used...
    Funcs.insert(Funcs.end(), KnownExternalFunctions,
                 KnownExternalFunctions+
              sizeof(KnownExternalFunctions)/sizeof(KnownExternalFunctions[0]));
    // Sort the list for efficient access
    std::sort(Funcs.begin(), Funcs.end());
  }

  if (Name.size() > 7 && !memcmp("__llvm_", Name.c_str(), 7))
    return true;

  // Binary search for the function name...
  std::vector<std::string>::iterator I =
    std::lower_bound(Funcs.begin(), Funcs.end(), Name);

  // Found it?
  return I != Funcs.end() && *I == Name;
}


// getNodeFor - Return the node for the specified function or create one if it
// does not already exist.
//
CallGraphNode *CallGraph::getNodeFor(Function *F) {
  CallGraphNode *&CGN = FunctionMap[F];
  if (CGN) return CGN;

  assert((!F || F->getParent() == Mod) && "Function not in current module!");
  return CGN = new CallGraphNode(F);
}

static bool isOnlyADirectCall(Function *F, CallSite CS) {
  if (!CS.getInstruction()) return false;
  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I)
    if (*I == F) return false;
  return true;
}

// addToCallGraph - Add a function to the call graph, and link the node to all
// of the functions that it calls.
//
void CallGraph::addToCallGraph(Function *F) {
  CallGraphNode *Node = getNodeFor(F);

  // If this function has external linkage, anything could call it...
  if (!F->hasInternalLinkage()) {
    ExternalNode->addCalledFunction(Node);

    // Found the entry point?
    if (F->getName() == "main") {
      if (Root)
        Root = ExternalNode;  // Found multiple external mains?  Don't pick one.
      else
        Root = Node;          // Found a main, keep track of it!
    }
  }

  // If this function is not defined in this translation unit, it could call
  // anything.
  if (F->isExternal() && !F->getIntrinsicID() &&
      !ExternalFunctionDoesntCallIntoProgram(F->getName()))
    Node->addCalledFunction(ExternalNode);

  // Loop over all of the users of the function... looking for callers...
  //
  bool isUsedExternally = false;
  for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I) {
    if (Instruction *Inst = dyn_cast<Instruction>(*I)) {
      if (isOnlyADirectCall(F, CallSite::get(Inst)))
        getNodeFor(Inst->getParent()->getParent())->addCalledFunction(Node);
      else
        isUsedExternally = true;
    } else if (ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(*I)) {
      // THIS IS A DISGUSTING HACK.  Brought to you by the power of
      // ConstantPointerRefs!
      for (Value::use_iterator I = CPR->use_begin(), E = CPR->use_end();
           I != E; ++I)
        if (Instruction *Inst = dyn_cast<Instruction>(*I)) {
          if (isOnlyADirectCall(F, CallSite::get(Inst)))
            getNodeFor(Inst->getParent()->getParent())->addCalledFunction(Node);
          else
            isUsedExternally = true;
        } else {
          isUsedExternally = true;
        }
    } else {                        // Can't classify the user!
      isUsedExternally = true;
    }
  }
  if (isUsedExternally)
    ExternalNode->addCalledFunction(Node);

  // Look for an indirect function call...
  for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II){
      CallSite CS = CallSite::get(II);
      if (CS.getInstruction() && !CS.getCalledFunction())
        Node->addCalledFunction(ExternalNode);
    }
}

bool CallGraph::run(Module &M) {
  destroy();

  Mod = &M;
  ExternalNode = getNodeFor(0);
  Root = 0;

  // Add every function to the call graph...
  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
    addToCallGraph(I);

  // If we didn't find a main function, use the external call graph node
  if (Root == 0) Root = ExternalNode;

  return false;
}

void CallGraph::destroy() {
  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
       I != E; ++I)
    delete I->second;
  FunctionMap.clear();
}

static void WriteToOutput(const CallGraphNode *CGN, std::ostream &o) {
  if (CGN->getFunction())
    o << "Call graph node for function: '"
      << CGN->getFunction()->getName() <<"'\n";
  else
    o << "Call graph node <<null function: 0x" << CGN << ">>:\n";

  for (unsigned i = 0; i < CGN->size(); ++i)
    if ((*CGN)[i]->getFunction())
      o << "  Calls function '" << (*CGN)[i]->getFunction()->getName() << "'\n";
    else
      o << "  Calls external node\n";
  o << "\n";
}

void CallGraph::print(std::ostream &o, const Module *M) const {
  o << "CallGraph Root is: ";
  if (getRoot()->getFunction())
    o << getRoot()->getFunction()->getName() << "\n";
  else
    o << "<<null function: 0x" << getRoot() << ">>\n";

  for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
    WriteToOutput(I->second, o);
}


//===----------------------------------------------------------------------===//
// Implementations of public modification methods
//

// Functions to keep a call graph up to date with a function that has been
// modified
//
void CallGraph::addFunctionToModule(Function *Meth) {
  assert(0 && "not implemented");
  abort();
}

// removeFunctionFromModule - Unlink the function from this module, returning
// it.  Because this removes the function from the module, the call graph node
// is destroyed.  This is only valid if the function does not call any other
// functions (ie, there are no edges in it's CGN).  The easiest way to do this
// is to dropAllReferences before calling this.
//
Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
  assert(CGN->CalledFunctions.empty() && "Cannot remove function from call "
         "graph if it references other functions!");
  Function *F = CGN->getFunction(); // Get the function for the call graph node
  delete CGN;                       // Delete the call graph node for this func
  FunctionMap.erase(F);             // Remove the call graph node from the map

  Mod->getFunctionList().remove(F);
  return F;
}

void CallGraph::stub() {}

} // End llvm namespace