2014-03-04 04:06:11 +08:00
|
|
|
//===- AddDiscriminators.cpp - Insert DWARF path discriminators -----------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file adds DWARF discriminators to the IR. Path discriminators are
|
|
|
|
// used to decide what CFG path was taken inside sub-graphs whose instructions
|
|
|
|
// share the same line and column number information.
|
|
|
|
//
|
|
|
|
// The main user of this is the sample profiler. Instruction samples are
|
|
|
|
// mapped to line number information. Since a single line may be spread
|
|
|
|
// out over several basic blocks, discriminators add more precise location
|
|
|
|
// for the samples.
|
|
|
|
//
|
|
|
|
// For example,
|
|
|
|
//
|
|
|
|
// 1 #define ASSERT(P)
|
|
|
|
// 2 if (!(P))
|
|
|
|
// 3 abort()
|
|
|
|
// ...
|
|
|
|
// 100 while (true) {
|
|
|
|
// 101 ASSERT (sum < 0);
|
|
|
|
// 102 ...
|
|
|
|
// 130 }
|
|
|
|
//
|
|
|
|
// when converted to IR, this snippet looks something like:
|
|
|
|
//
|
|
|
|
// while.body: ; preds = %entry, %if.end
|
|
|
|
// %0 = load i32* %sum, align 4, !dbg !15
|
|
|
|
// %cmp = icmp slt i32 %0, 0, !dbg !15
|
|
|
|
// br i1 %cmp, label %if.end, label %if.then, !dbg !15
|
|
|
|
//
|
|
|
|
// if.then: ; preds = %while.body
|
|
|
|
// call void @abort(), !dbg !15
|
|
|
|
// br label %if.end, !dbg !15
|
|
|
|
//
|
|
|
|
// Notice that all the instructions in blocks 'while.body' and 'if.then'
|
|
|
|
// have exactly the same debug information. When this program is sampled
|
|
|
|
// at runtime, the profiler will assume that all these instructions are
|
|
|
|
// equally frequent. This, in turn, will consider the edge while.body->if.then
|
|
|
|
// to be frequently taken (which is incorrect).
|
|
|
|
//
|
|
|
|
// By adding a discriminator value to the instructions in block 'if.then',
|
|
|
|
// we can distinguish instructions at line 101 with discriminator 0 from
|
|
|
|
// the instructions at line 101 with discriminator 1.
|
|
|
|
//
|
|
|
|
// For more details about DWARF discriminators, please visit
|
|
|
|
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-06-16 05:51:30 +08:00
|
|
|
#include "llvm/Transforms/Utils/AddDiscriminators.h"
|
2015-11-20 03:53:05 +08:00
|
|
|
#include "llvm/ADT/DenseMap.h"
|
2016-04-15 02:37:18 +08:00
|
|
|
#include "llvm/ADT/DenseSet.h"
|
2014-03-04 04:06:11 +08:00
|
|
|
#include "llvm/IR/BasicBlock.h"
|
2014-03-04 18:07:28 +08:00
|
|
|
#include "llvm/IR/Constants.h"
|
2014-03-06 08:22:06 +08:00
|
|
|
#include "llvm/IR/DIBuilder.h"
|
2014-03-06 08:46:21 +08:00
|
|
|
#include "llvm/IR/DebugInfo.h"
|
2014-03-04 04:06:11 +08:00
|
|
|
#include "llvm/IR/Instructions.h"
|
2015-11-16 18:40:38 +08:00
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
2014-03-04 04:06:11 +08:00
|
|
|
#include "llvm/IR/LLVMContext.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2015-11-20 03:53:05 +08:00
|
|
|
#include "llvm/Transforms/Scalar.h"
|
2014-03-04 04:06:11 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 10:55:47 +08:00
|
|
|
#define DEBUG_TYPE "add-discriminators"
|
|
|
|
|
2014-03-04 04:06:11 +08:00
|
|
|
namespace {
|
2016-06-16 05:51:30 +08:00
|
|
|
// The legacy pass of AddDiscriminators.
|
|
|
|
struct AddDiscriminatorsLegacyPass : public FunctionPass {
|
2015-10-30 05:25:33 +08:00
|
|
|
static char ID; // Pass identification, replacement for typeid
|
2016-06-16 05:51:30 +08:00
|
|
|
AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
|
|
|
|
initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
|
2015-10-30 05:25:33 +08:00
|
|
|
}
|
2014-03-04 04:06:11 +08:00
|
|
|
|
2015-10-30 05:25:33 +08:00
|
|
|
bool runOnFunction(Function &F) override;
|
|
|
|
};
|
2016-06-16 05:51:30 +08:00
|
|
|
|
2016-01-27 02:48:36 +08:00
|
|
|
} // end anonymous namespace
|
2014-03-04 04:06:11 +08:00
|
|
|
|
2016-06-16 05:51:30 +08:00
|
|
|
char AddDiscriminatorsLegacyPass::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
|
2014-03-04 04:06:11 +08:00
|
|
|
"Add DWARF path discriminators", false, false)
|
2016-06-16 05:51:30 +08:00
|
|
|
INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
|
2014-03-04 04:06:11 +08:00
|
|
|
"Add DWARF path discriminators", false, false)
|
|
|
|
|
|
|
|
// Command line option to disable discriminator generation even in the
|
|
|
|
// presence of debug information. This is only needed when debugging
|
|
|
|
// debug info generation issues.
|
2015-10-30 05:25:33 +08:00
|
|
|
static cl::opt<bool> NoDiscriminators(
|
|
|
|
"no-discriminators", cl::init(false),
|
|
|
|
cl::desc("Disable generation of discriminator information."));
|
2014-03-04 04:06:11 +08:00
|
|
|
|
2016-06-16 05:51:30 +08:00
|
|
|
// Create the legacy AddDiscriminatorsPass.
|
2014-03-04 04:06:11 +08:00
|
|
|
FunctionPass *llvm::createAddDiscriminatorsPass() {
|
2016-06-16 05:51:30 +08:00
|
|
|
return new AddDiscriminatorsLegacyPass();
|
2014-03-04 04:06:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Assign DWARF discriminators.
|
|
|
|
///
|
|
|
|
/// To assign discriminators, we examine the boundaries of every
|
|
|
|
/// basic block and its successors. Suppose there is a basic block B1
|
|
|
|
/// with successor B2. The last instruction I1 in B1 and the first
|
|
|
|
/// instruction I2 in B2 are located at the same file and line number.
|
|
|
|
/// This situation is illustrated in the following code snippet:
|
|
|
|
///
|
|
|
|
/// if (i < 10) x = i;
|
|
|
|
///
|
|
|
|
/// entry:
|
|
|
|
/// br i1 %cmp, label %if.then, label %if.end, !dbg !10
|
|
|
|
/// if.then:
|
|
|
|
/// %1 = load i32* %i.addr, align 4, !dbg !10
|
|
|
|
/// store i32 %1, i32* %x, align 4, !dbg !10
|
|
|
|
/// br label %if.end, !dbg !10
|
|
|
|
/// if.end:
|
|
|
|
/// ret void, !dbg !12
|
|
|
|
///
|
|
|
|
/// Notice how the branch instruction in block 'entry' and all the
|
|
|
|
/// instructions in block 'if.then' have the exact same debug location
|
|
|
|
/// information (!dbg !10).
|
|
|
|
///
|
|
|
|
/// To distinguish instructions in block 'entry' from instructions in
|
|
|
|
/// block 'if.then', we generate a new lexical block for all the
|
|
|
|
/// instruction in block 'if.then' that share the same file and line
|
|
|
|
/// location with the last instruction of block 'entry'.
|
|
|
|
///
|
|
|
|
/// This new lexical block will have the same location information as
|
|
|
|
/// the previous one, but with a new DWARF discriminator value.
|
|
|
|
///
|
|
|
|
/// One of the main uses of this discriminator value is in runtime
|
|
|
|
/// sample profilers. It allows the profiler to distinguish instructions
|
|
|
|
/// at location !dbg !10 that execute on different basic blocks. This is
|
|
|
|
/// important because while the predicate 'if (x < 10)' may have been
|
|
|
|
/// executed millions of times, the assignment 'x = i' may have only
|
|
|
|
/// executed a handful of times (meaning that the entry->if.then edge is
|
|
|
|
/// seldom taken).
|
|
|
|
///
|
|
|
|
/// If we did not have discriminator information, the profiler would
|
|
|
|
/// assign the same weight to both blocks 'entry' and 'if.then', which
|
|
|
|
/// in turn will make it conclude that the entry->if.then edge is very
|
|
|
|
/// hot.
|
|
|
|
///
|
|
|
|
/// To decide where to create new discriminator values, this function
|
|
|
|
/// traverses the CFG and examines instruction at basic block boundaries.
|
|
|
|
/// If the last instruction I1 of a block B1 is at the same file and line
|
|
|
|
/// location as instruction I2 of successor B2, then it creates a new
|
|
|
|
/// lexical block for I2 and all the instruction in B2 that share the same
|
|
|
|
/// file and line location as I2. This new lexical block will have a
|
|
|
|
/// different discriminator number than I1.
|
2016-06-16 06:20:56 +08:00
|
|
|
static bool addDiscriminators(Function &F) {
|
2014-03-04 04:06:11 +08:00
|
|
|
// If the function has debug information, but the user has disabled
|
|
|
|
// discriminators, do nothing.
|
2014-04-18 06:33:50 +08:00
|
|
|
// Simlarly, if the function has no debug info, do nothing.
|
2016-10-07 23:21:31 +08:00
|
|
|
if (NoDiscriminators || !F.getSubprogram())
|
2014-04-18 06:33:50 +08:00
|
|
|
return false;
|
2014-03-04 04:06:11 +08:00
|
|
|
|
|
|
|
bool Changed = false;
|
|
|
|
Module *M = F.getParent();
|
|
|
|
LLVMContext &Ctx = M->getContext();
|
IR: Split Metadata from Value
Split `Metadata` away from the `Value` class hierarchy, as part of
PR21532. Assembly and bitcode changes are in the wings, but this is the
bulk of the change for the IR C++ API.
I have a follow-up patch prepared for `clang`. If this breaks other
sub-projects, I apologize in advance :(. Help me compile it on Darwin
I'll try to fix it. FWIW, the errors should be easy to fix, so it may
be simpler to just fix it yourself.
This breaks the build for all metadata-related code that's out-of-tree.
Rest assured the transition is mechanical and the compiler should catch
almost all of the problems.
Here's a quick guide for updating your code:
- `Metadata` is the root of a class hierarchy with three main classes:
`MDNode`, `MDString`, and `ValueAsMetadata`. It is distinct from
the `Value` class hierarchy. It is typeless -- i.e., instances do
*not* have a `Type`.
- `MDNode`'s operands are all `Metadata *` (instead of `Value *`).
- `TrackingVH<MDNode>` and `WeakVH` referring to metadata can be
replaced with `TrackingMDNodeRef` and `TrackingMDRef`, respectively.
If you're referring solely to resolved `MDNode`s -- post graph
construction -- just use `MDNode*`.
- `MDNode` (and the rest of `Metadata`) have only limited support for
`replaceAllUsesWith()`.
As long as an `MDNode` is pointing at a forward declaration -- the
result of `MDNode::getTemporary()` -- it maintains a side map of its
uses and can RAUW itself. Once the forward declarations are fully
resolved RAUW support is dropped on the ground. This means that
uniquing collisions on changing operands cause nodes to become
"distinct". (This already happened fairly commonly, whenever an
operand went to null.)
If you're constructing complex (non self-reference) `MDNode` cycles,
you need to call `MDNode::resolveCycles()` on each node (or on a
top-level node that somehow references all of the nodes). Also,
don't do that. Metadata cycles (and the RAUW machinery needed to
construct them) are expensive.
- An `MDNode` can only refer to a `Constant` through a bridge called
`ConstantAsMetadata` (one of the subclasses of `ValueAsMetadata`).
As a side effect, accessing an operand of an `MDNode` that is known
to be, e.g., `ConstantInt`, takes three steps: first, cast from
`Metadata` to `ConstantAsMetadata`; second, extract the `Constant`;
third, cast down to `ConstantInt`.
The eventual goal is to introduce `MDInt`/`MDFloat`/etc. and have
metadata schema owners transition away from using `Constant`s when
the type isn't important (and they don't care about referring to
`GlobalValue`s).
In the meantime, I've added transitional API to the `mdconst`
namespace that matches semantics with the old code, in order to
avoid adding the error-prone three-step equivalent to every call
site. If your old code was:
MDNode *N = foo();
bar(isa <ConstantInt>(N->getOperand(0)));
baz(cast <ConstantInt>(N->getOperand(1)));
bak(cast_or_null <ConstantInt>(N->getOperand(2)));
bat(dyn_cast <ConstantInt>(N->getOperand(3)));
bay(dyn_cast_or_null<ConstantInt>(N->getOperand(4)));
you can trivially match its semantics with:
MDNode *N = foo();
bar(mdconst::hasa <ConstantInt>(N->getOperand(0)));
baz(mdconst::extract <ConstantInt>(N->getOperand(1)));
bak(mdconst::extract_or_null <ConstantInt>(N->getOperand(2)));
bat(mdconst::dyn_extract <ConstantInt>(N->getOperand(3)));
bay(mdconst::dyn_extract_or_null<ConstantInt>(N->getOperand(4)));
and when you transition your metadata schema to `MDInt`:
MDNode *N = foo();
bar(isa <MDInt>(N->getOperand(0)));
baz(cast <MDInt>(N->getOperand(1)));
bak(cast_or_null <MDInt>(N->getOperand(2)));
bat(dyn_cast <MDInt>(N->getOperand(3)));
bay(dyn_cast_or_null<MDInt>(N->getOperand(4)));
- A `CallInst` -- specifically, intrinsic instructions -- can refer to
metadata through a bridge called `MetadataAsValue`. This is a
subclass of `Value` where `getType()->isMetadataTy()`.
`MetadataAsValue` is the *only* class that can legally refer to a
`LocalAsMetadata`, which is a bridged form of non-`Constant` values
like `Argument` and `Instruction`. It can also refer to any other
`Metadata` subclass.
(I'll break all your testcases in a follow-up commit, when I propagate
this change to assembly.)
llvm-svn: 223802
2014-12-10 02:38:53 +08:00
|
|
|
DIBuilder Builder(*M, /*AllowUnresolved*/ false);
|
2014-03-04 04:06:11 +08:00
|
|
|
|
2015-11-20 03:53:05 +08:00
|
|
|
typedef std::pair<StringRef, unsigned> Location;
|
2016-10-25 02:23:51 +08:00
|
|
|
typedef SmallDenseMap<DIScope *, DILexicalBlockFile *, 1> ScopeMap;
|
|
|
|
typedef DenseMap<const BasicBlock *, ScopeMap> BBScopeMap;
|
2015-11-20 03:53:05 +08:00
|
|
|
typedef DenseMap<Location, BBScopeMap> LocationBBMap;
|
2016-03-01 02:59:48 +08:00
|
|
|
typedef DenseMap<Location, unsigned> LocationDiscriminatorMap;
|
2016-04-15 02:37:18 +08:00
|
|
|
typedef DenseSet<Location> LocationSet;
|
2015-04-14 08:35:42 +08:00
|
|
|
|
2015-11-20 03:53:05 +08:00
|
|
|
LocationBBMap LBM;
|
2016-03-01 02:59:48 +08:00
|
|
|
LocationDiscriminatorMap LDM;
|
2014-03-04 04:06:11 +08:00
|
|
|
|
2015-11-20 03:53:05 +08:00
|
|
|
// Traverse all instructions in the function. If the source line location
|
|
|
|
// of the instruction appears in other basic block, assign a new
|
|
|
|
// discriminator for this instruction.
|
|
|
|
for (BasicBlock &B : F) {
|
|
|
|
for (auto &I : B.getInstList()) {
|
2016-08-06 01:56:49 +08:00
|
|
|
if (isa<IntrinsicInst>(&I))
|
2015-11-20 03:53:05 +08:00
|
|
|
continue;
|
|
|
|
const DILocation *DIL = I.getDebugLoc();
|
|
|
|
if (!DIL)
|
|
|
|
continue;
|
2016-10-25 02:23:51 +08:00
|
|
|
DIScope *Scope = DIL->getScope();
|
2015-11-20 03:53:05 +08:00
|
|
|
Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
|
|
|
|
auto &BBMap = LBM[L];
|
2016-10-25 02:23:51 +08:00
|
|
|
auto R = BBMap.insert({&B, ScopeMap()});
|
2015-11-20 03:53:05 +08:00
|
|
|
if (BBMap.size() == 1)
|
|
|
|
continue;
|
|
|
|
bool InsertSuccess = R.second;
|
2016-10-25 02:23:51 +08:00
|
|
|
ScopeMap &Scopes = R.first->second;
|
|
|
|
// If we could insert more than one block with the same line+file, a
|
2015-11-20 03:53:05 +08:00
|
|
|
// discriminator is needed to distinguish both instructions.
|
2016-10-25 02:23:51 +08:00
|
|
|
auto R1 = Scopes.insert({Scope, nullptr});
|
|
|
|
DILexicalBlockFile *&NewScope = R1.first->second;
|
|
|
|
if (!NewScope) {
|
2016-10-26 00:50:27 +08:00
|
|
|
unsigned Discriminator = InsertSuccess ? ++LDM[L] : LDM[L];
|
2016-10-25 02:23:51 +08:00
|
|
|
auto *File = Builder.createFile(DIL->getFilename(),
|
|
|
|
Scope->getDirectory());
|
|
|
|
NewScope = Builder.createLexicalBlockFile(Scope, File, Discriminator);
|
2014-03-04 04:06:11 +08:00
|
|
|
}
|
2015-11-20 03:53:05 +08:00
|
|
|
I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(),
|
|
|
|
NewScope, DIL->getInlinedAt()));
|
|
|
|
DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
|
2015-11-20 04:29:27 +08:00
|
|
|
<< DIL->getColumn() << ":"
|
|
|
|
<< dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator()
|
2015-11-20 03:53:05 +08:00
|
|
|
<< I << "\n");
|
|
|
|
Changed = true;
|
2014-03-04 04:06:11 +08:00
|
|
|
}
|
|
|
|
}
|
2015-11-10 01:30:38 +08:00
|
|
|
|
|
|
|
// Traverse all instructions and assign new discriminators to call
|
|
|
|
// instructions with the same lineno that are in the same basic block.
|
|
|
|
// Sample base profile needs to distinguish different function calls within
|
|
|
|
// a same source line for correct profile annotation.
|
|
|
|
for (BasicBlock &B : F) {
|
2016-04-15 02:37:18 +08:00
|
|
|
LocationSet CallLocations;
|
2015-11-10 01:30:38 +08:00
|
|
|
for (auto &I : B.getInstList()) {
|
|
|
|
CallInst *Current = dyn_cast<CallInst>(&I);
|
2016-08-06 01:56:49 +08:00
|
|
|
if (!Current || isa<IntrinsicInst>(&I))
|
2015-11-16 18:40:38 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
DILocation *CurrentDIL = Current->getDebugLoc();
|
2016-04-15 03:46:38 +08:00
|
|
|
if (!CurrentDIL)
|
|
|
|
continue;
|
2016-04-15 02:37:18 +08:00
|
|
|
Location L =
|
|
|
|
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
|
|
|
|
if (!CallLocations.insert(L).second) {
|
|
|
|
auto *Scope = CurrentDIL->getScope();
|
|
|
|
auto *File = Builder.createFile(CurrentDIL->getFilename(),
|
|
|
|
Scope->getDirectory());
|
|
|
|
auto *NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]);
|
|
|
|
Current->setDebugLoc(DILocation::get(Ctx, CurrentDIL->getLine(),
|
|
|
|
CurrentDIL->getColumn(), NewScope,
|
|
|
|
CurrentDIL->getInlinedAt()));
|
|
|
|
Changed = true;
|
2015-11-10 01:30:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-03-04 04:06:11 +08:00
|
|
|
return Changed;
|
|
|
|
}
|
2016-06-16 05:51:30 +08:00
|
|
|
|
|
|
|
bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
|
|
|
|
return addDiscriminators(F);
|
|
|
|
}
|
|
|
|
PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
|
2016-08-09 08:28:15 +08:00
|
|
|
FunctionAnalysisManager &AM) {
|
2016-06-16 06:20:56 +08:00
|
|
|
if (!addDiscriminators(F))
|
|
|
|
return PreservedAnalyses::all();
|
|
|
|
|
|
|
|
// FIXME: should be all()
|
|
|
|
return PreservedAnalyses::none();
|
2016-06-16 05:51:30 +08:00
|
|
|
}
|