CodeGen: Initial instrumentation based PGO implementation

llvm-svn: 198640
This commit is contained in:
Justin Bogner 2014-01-06 22:27:43 +00:00
parent 6a9d2cfadd
commit ef512b9929
20 changed files with 1709 additions and 58 deletions

View File

@ -31,7 +31,8 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock);
CGF.EmitBranchOnBoolExpr(E->getConfig(), ContBlock, ConfigOKBlock,
/*TrueCount=*/0);
eval.begin(CGF);
CGF.EmitBlock(ConfigOKBlock);

View File

@ -2184,6 +2184,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
call->setCallingConv(getRuntimeCC());
Builder.CreateUnreachable();
}
PGO.setCurrentRegionCount(0);
}
/// Emits a call or invoke instruction to the given nullary runtime

View File

@ -1294,6 +1294,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// Initialize the catch variable and set up the cleanups.
BeginCatch(*this, C);
// Emit the PGO counter increment
RegionCounter CatchCnt = getPGORegionCounter(C);
CatchCnt.beginRegion(Builder);
// Perform the body of the catch.
EmitStmt(C->getHandlerBlock());
@ -1320,7 +1324,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
Builder.CreateBr(ContBB);
}
RegionCounter ContCnt = getPGORegionCounter(&S);
EmitBlock(ContBB);
ContCnt.beginRegion(Builder);
}
namespace {

View File

@ -2651,6 +2651,7 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
}
OpaqueValueMapping binding(*this, expr);
RegionCounter Cnt = getPGORegionCounter(expr);
const Expr *condExpr = expr->getCond();
bool CondExprBool;
@ -2658,8 +2659,12 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
const Expr *live = expr->getTrueExpr(), *dead = expr->getFalseExpr();
if (!CondExprBool) std::swap(live, dead);
if (!ContainsLabel(dead))
if (!ContainsLabel(dead)) {
// If the true case is live, we need to track its region
if (CondExprBool)
Cnt.beginRegion(Builder);
return EmitLValue(live);
}
}
llvm::BasicBlock *lhsBlock = createBasicBlock("cond.true");
@ -2667,13 +2672,15 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
ConditionalEvaluation eval(*this);
EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock);
EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
// Any temporaries created here are conditional.
EmitBlock(lhsBlock);
Cnt.beginRegion(Builder);
eval.begin(*this);
LValue lhs = EmitLValue(expr->getTrueExpr());
eval.end(*this);
Cnt.adjustFallThroughCount();
if (!lhs.isSimple())
return EmitUnsupportedLValue(expr, "conditional operator");
@ -2683,14 +2690,17 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
// Any temporaries created here are conditional.
EmitBlock(rhsBlock);
Cnt.beginElseRegion();
eval.begin(*this);
LValue rhs = EmitLValue(expr->getFalseExpr());
eval.end(*this);
Cnt.adjustFallThroughCount();
if (!rhs.isSimple())
return EmitUnsupportedLValue(expr, "conditional operator");
rhsBlock = Builder.GetInsertBlock();
EmitBlock(contBlock);
Cnt.applyAdjustmentsToRegion();
llvm::PHINode *phi = Builder.CreatePHI(lhs.getAddress()->getType(), 2,
"cond-lvalue");

View File

@ -892,15 +892,18 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
RegionCounter Cnt = CGF.getPGORegionCounter(E);
CodeGenFunction::ConditionalEvaluation eval(CGF);
CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
// Save whether the destination's lifetime is externally managed.
bool isExternallyDestructed = Dest.isExternallyDestructed();
eval.begin(CGF);
CGF.EmitBlock(LHSBlock);
Cnt.beginRegion(Builder);
Visit(E->getTrueExpr());
Cnt.adjustFallThroughCount();
eval.end(CGF);
assert(CGF.HaveInsertPoint() && "expression evaluation ended with no IP!");
@ -914,10 +917,13 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
Cnt.beginElseRegion();
Visit(E->getFalseExpr());
Cnt.adjustFallThroughCount();
eval.end(CGF);
CGF.EmitBlock(ContBlock);
Cnt.applyAdjustmentsToRegion();
}
void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {

View File

@ -752,22 +752,28 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
RegionCounter Cnt = CGF.getPGORegionCounter(E);
CodeGenFunction::ConditionalEvaluation eval(CGF);
CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock);
CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
eval.begin(CGF);
CGF.EmitBlock(LHSBlock);
Cnt.beginRegion(Builder);
ComplexPairTy LHS = Visit(E->getTrueExpr());
Cnt.adjustFallThroughCount();
LHSBlock = Builder.GetInsertBlock();
CGF.EmitBranch(ContBlock);
eval.end(CGF);
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
Cnt.beginElseRegion();
ComplexPairTy RHS = Visit(E->getFalseExpr());
Cnt.adjustFallThroughCount();
RHSBlock = Builder.GetInsertBlock();
CGF.EmitBlock(ContBlock);
eval.end(CGF);
Cnt.applyAdjustmentsToRegion();
// Create a PHI node for the real part.
llvm::PHINode *RealPN = Builder.CreatePHI(LHS.first->getType(), 2, "cond.r");

View File

@ -2874,8 +2874,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
}
Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
RegionCounter Cnt = CGF.getPGORegionCounter(E);
// Perform vector logical and on comparisons with zero vectors.
if (E->getType()->isVectorType()) {
Cnt.beginRegion(Builder);
Value *LHS = Visit(E->getLHS());
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@ -2897,6 +2901,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
bool LHSCondVal;
if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
if (LHSCondVal) { // If we have 1 && X, just emit X.
Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
// ZExt result to int or bool.
return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "land.ext");
@ -2913,7 +2919,7 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
CodeGenFunction::ConditionalEvaluation eval(CGF);
// Branch on the LHS first. If it is false, go to the failure (cont) block.
CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock);
CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
// Any edges into the ContBlock are now from an (indeterminate number of)
// edges from this first condition. All of these values will be false. Start
@ -2926,7 +2932,9 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
eval.begin(CGF);
CGF.EmitBlock(RHSBlock);
Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
Cnt.adjustFallThroughCount();
eval.end(CGF);
// Reaquire the RHS block, as there may be subblocks inserted.
@ -2939,14 +2947,19 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
CGF.EmitBlock(ContBlock);
PN->addIncoming(RHSCond, RHSBlock);
Cnt.applyAdjustmentsToRegion();
// ZExt result to int.
return Builder.CreateZExtOrBitCast(PN, ResTy, "land.ext");
}
Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
RegionCounter Cnt = CGF.getPGORegionCounter(E);
// Perform vector logical or on comparisons with zero vectors.
if (E->getType()->isVectorType()) {
Cnt.beginRegion(Builder);
Value *LHS = Visit(E->getLHS());
Value *RHS = Visit(E->getRHS());
Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType());
@ -2968,6 +2981,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
bool LHSCondVal;
if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
if (!LHSCondVal) { // If we have 0 || X, just emit X.
Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
// ZExt result to int or bool.
return Builder.CreateZExtOrBitCast(RHSCond, ResTy, "lor.ext");
@ -2984,7 +2999,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
CodeGenFunction::ConditionalEvaluation eval(CGF);
// Branch on the LHS first. If it is true, go to the success (cont) block.
CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock);
CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
Cnt.getParentCount() - Cnt.getCount());
// Any edges into the ContBlock are now from an (indeterminate number of)
// edges from this first condition. All of these values will be true. Start
@ -2999,7 +3015,9 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
// Emit the RHS condition as a bool value.
CGF.EmitBlock(RHSBlock);
Cnt.beginRegion(Builder);
Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
Cnt.adjustFallThroughCount();
eval.end(CGF);
@ -3010,6 +3028,7 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
// into the phi node for the edge with the value of RHSCond.
CGF.EmitBlock(ContBlock);
PN->addIncoming(RHSCond, RHSBlock);
Cnt.applyAdjustmentsToRegion();
// ZExt result to int.
return Builder.CreateZExtOrBitCast(PN, ResTy, "lor.ext");
@ -3049,6 +3068,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// Bind the common expression if necessary.
CodeGenFunction::OpaqueValueMapping binding(CGF, E);
RegionCounter Cnt = CGF.getPGORegionCounter(E);
Expr *condExpr = E->getCond();
Expr *lhsExpr = E->getTrueExpr();
@ -3063,6 +3083,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// If the dead side doesn't have labels we need, just emit the Live part.
if (!CGF.ContainsLabel(dead)) {
if (CondExprBool)
Cnt.beginRegion(Builder);
Value *Result = Visit(live);
// If the live part is a throw expression, it acts like it has a void
@ -3079,6 +3101,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// the select function.
if (CGF.getLangOpts().OpenCL
&& condExpr->getType()->isVectorType()) {
Cnt.beginRegion(Builder);
llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
@ -3122,6 +3146,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
// safe to evaluate the LHS and RHS unconditionally.
if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
Cnt.beginRegion(Builder);
llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
llvm::Value *LHS = Visit(lhsExpr);
llvm::Value *RHS = Visit(rhsExpr);
@ -3138,23 +3164,28 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
CodeGenFunction::ConditionalEvaluation eval(CGF);
CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock);
CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
CGF.EmitBlock(LHSBlock);
Cnt.beginRegion(Builder);
eval.begin(CGF);
Value *LHS = Visit(lhsExpr);
eval.end(CGF);
Cnt.adjustFallThroughCount();
LHSBlock = Builder.GetInsertBlock();
Builder.CreateBr(ContBlock);
CGF.EmitBlock(RHSBlock);
Cnt.beginElseRegion();
eval.begin(CGF);
Value *RHS = Visit(rhsExpr);
eval.end(CGF);
Cnt.adjustFallThroughCount();
RHSBlock = Builder.GetInsertBlock();
CGF.EmitBlock(ContBlock);
Cnt.applyAdjustmentsToRegion();
// If the LHS or RHS is a throw expression, it will be legitimately null.
if (!LHS)

View File

@ -1523,10 +1523,13 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
llvm::Value *initialMutations =
Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations");
RegionCounter Cnt = getPGORegionCounter(&S);
// Start looping. This is the point we return to whenever we have a
// fresh, non-empty batch of objects.
llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody");
EmitBlock(LoopBodyBB);
Cnt.beginRegion(Builder);
// The current index into the buffer.
llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index");
@ -1623,7 +1626,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
EmitAutoVarCleanups(variable);
// Perform the loop body, setting up break and continue labels.
BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody));
BreakContinueStack.push_back(BreakContinue(LoopEnd, AfterBody, &Cnt));
{
RunCleanupsScope Scope(*this);
EmitStmt(S.getBody());
@ -1642,6 +1645,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
llvm::Value *indexPlusOne
= Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1));
// TODO: We should probably model this as a "continue" for PGO
// If we haven't overrun the buffer yet, we can continue.
Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
LoopBodyBB, FetchMoreBB);
@ -1665,6 +1669,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
index->addIncoming(zero, Builder.GetInsertBlock());
count->addIncoming(refetchCount, Builder.GetInsertBlock());
// TODO: We should be applying PGO weights here, but this needs to handle the
// branch before FetchMoreBB or we risk getting the numbers wrong.
Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero),
EmptyBB, LoopBodyBB);
@ -1687,6 +1693,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
PopCleanupBlock();
EmitBlock(LoopEnd.getBlock());
// TODO: Once we calculate PGO weights above, set the region count here
}
void CodeGenFunction::EmitObjCAtTryStmt(const ObjCAtTryStmt &S) {

View File

@ -358,7 +358,9 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) {
ResolveBranchFixups(Dest.getBlock());
}
RegionCounter Cnt = getPGORegionCounter(D->getStmt());
EmitBlock(Dest.getBlock());
Cnt.beginRegion(Builder);
}
/// Change the cleanup scope of the labels in this lexical scope to
@ -402,12 +404,14 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
EmitStopPoint(&S);
EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
if (const LabelDecl *Target = S.getConstantTarget()) {
EmitBranchThroughCleanup(getJumpDestForLabel(Target));
PGO.setCurrentRegionCount(0);
return;
}
@ -424,12 +428,14 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
EmitBranch(IndGotoBB);
PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// C99 6.8.4.1: The first substatement is executed if the expression compares
// unequal to 0. The condition must be a scalar type.
LexicalScope ConditionScope(*this, S.getSourceRange());
RegionCounter Cnt = getPGORegionCounter(&S);
if (S.getConditionVariable())
EmitAutoVarDecl(*S.getConditionVariable());
@ -447,6 +453,8 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// If the skipped block has no labels in it, just emit the executed block.
// This avoids emitting dead code and simplifies the CFG substantially.
if (!ContainsLabel(Skipped)) {
if (CondConstant)
Cnt.beginRegion(Builder);
if (Executed) {
RunCleanupsScope ExecutedScope(*this);
EmitStmt(Executed);
@ -462,14 +470,17 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
llvm::BasicBlock *ElseBlock = ContBlock;
if (S.getElse())
ElseBlock = createBasicBlock("if.else");
EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock);
EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
// Emit the 'then' code.
EmitBlock(ThenBlock);
EmitBlock(ThenBlock);
Cnt.beginRegion(Builder);
{
RunCleanupsScope ThenScope(*this);
EmitStmt(S.getThen());
}
Cnt.adjustFallThroughCount();
EmitBranch(ContBlock);
// Emit the 'else' code if present.
@ -478,10 +489,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
if (getDebugInfo())
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
EmitBlock(ElseBlock);
Cnt.beginElseRegion();
{
RunCleanupsScope ElseScope(*this);
EmitStmt(Else);
}
Cnt.adjustFallThroughCount();
// There is no need to emit line number for unconditional branch.
if (getDebugInfo())
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
@ -490,9 +503,12 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
// Emit the continuation block for code after the if.
EmitBlock(ContBlock, true);
Cnt.applyAdjustmentsToRegion();
}
void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
RegionCounter Cnt = getPGORegionCounter(&S);
// Emit the header for the loop, which will also become
// the continue target.
JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
@ -503,7 +519,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
// Store the blocks to use for break and continue.
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader, &Cnt));
// C++ [stmt.while]p2:
// When the condition of a while statement is a declaration, the
@ -525,6 +541,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
// while(1) is common, avoid extra exit blocks. Be sure
// to correctly handle break/continue though.
bool EmitBoolCondBranch = true;
llvm::BranchInst *CondBr = NULL;
if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal))
if (C->isOne())
EmitBoolCondBranch = false;
@ -536,7 +553,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
if (ConditionScope.requiresCleanups())
ExitBlock = createBasicBlock("while.exit");
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
CondBr = Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@ -549,11 +566,19 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
{
RunCleanupsScope BodyScope(*this);
EmitBlock(LoopBody);
Cnt.beginRegion(Builder);
EmitStmt(S.getBody());
}
Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
uint64_t LoopCount = Cnt.getCount();
uint64_t ExitCount = Cnt.getLoopExitCount();
if (EmitBoolCondBranch)
CondBr->setMetadata(llvm::LLVMContext::MD_prof,
PGO.createBranchWeights(LoopCount, ExitCount));
// Immediately force cleanup.
ConditionScope.ForceCleanup();
@ -562,6 +587,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) {
// Emit the exit block.
EmitBlock(LoopExit.getBlock(), true);
PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
// The LoopHeader typically is just a branch if we skipped emitting
// a branch, try to erase it.
@ -573,16 +599,20 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
RegionCounter Cnt = getPGORegionCounter(&S);
// Store the blocks to use for break and continue.
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond, &Cnt));
// Emit the body of the loop.
llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
EmitBlock(LoopBody);
Cnt.beginRegion(Builder);
{
RunCleanupsScope BodyScope(*this);
EmitStmt(S.getBody());
}
Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
@ -603,12 +633,18 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
if (C->isZero())
EmitBoolCondBranch = false;
uint64_t LoopCount = Cnt.getCount() - Cnt.getParentCount();
uint64_t ExitCount = Cnt.getLoopExitCount();
// As long as the condition is true, iterate the loop.
if (EmitBoolCondBranch)
Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock());
if (EmitBoolCondBranch) {
Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
PGO.createBranchWeights(LoopCount, ExitCount));
}
// Emit the exit block.
EmitBlock(LoopExit.getBlock());
PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
// The DoCond block typically is just a branch if we skipped
// emitting a branch, try to erase it.
@ -617,6 +653,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S) {
}
void CodeGenFunction::EmitForStmt(const ForStmt &S) {
RegionCounter Cnt = getPGORegionCounter(&S);
JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
RunCleanupsScope ForScope(*this);
@ -639,6 +677,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// Create a cleanup scope for the condition variable cleanups.
RunCleanupsScope ConditionScope(*this);
llvm::BranchInst *CondBr = NULL;
if (S.getCond()) {
// If the for statement has a condition scope, emit the local variable
// declaration.
@ -658,7 +697,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// C99 6.8.5p2/p4: The first substatement is executed if the expression
// compares unequal to 0. The condition must be a scalar type.
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
CondBr = Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@ -670,6 +709,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// Treat it as a non-zero constant. Don't even create a new block for the
// body, just fall into it.
}
Cnt.beginRegion(Builder);
// If the for loop doesn't have an increment we can just use the
// condition as the continue block. Otherwise we'll need to create
@ -679,7 +719,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
Continue = getJumpDestInCurrentScope("for.inc");
// Store the blocks to use for break and continue.
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
{
// Create a separate cleanup scope for the body, in case it is not
@ -693,9 +733,16 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
EmitBlock(Continue.getBlock());
EmitStmt(S.getInc());
}
Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
uint64_t LoopCount = Cnt.getCount();
uint64_t ExitCount = Cnt.getLoopExitCount();
if (S.getCond())
CondBr->setMetadata(llvm::LLVMContext::MD_prof,
PGO.createBranchWeights(LoopCount, ExitCount));
ConditionScope.ForceCleanup();
EmitBranch(CondBlock);
@ -706,9 +753,12 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) {
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock(), true);
PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
}
void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
RegionCounter Cnt = getPGORegionCounter(&S);
JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
RunCleanupsScope ForScope(*this);
@ -739,7 +789,8 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
// The body is executed if the expression, contextually converted
// to bool, is true.
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock);
llvm::BranchInst *CondBr = Builder.CreateCondBr(BoolCondVal,
ForBody, ExitBlock);
if (ExitBlock != LoopExit.getBlock()) {
EmitBlock(ExitBlock);
@ -747,12 +798,13 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
}
EmitBlock(ForBody);
Cnt.beginRegion(Builder);
// Create a block for the increment. In case of a 'continue', we jump there.
JumpDest Continue = getJumpDestInCurrentScope("for.inc");
// Store the blocks to use for break and continue.
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue, &Cnt));
{
// Create a separate cleanup scope for the loop variable and body.
@ -764,9 +816,15 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
// If there is an increment, emit it next.
EmitBlock(Continue.getBlock());
EmitStmt(S.getInc());
Cnt.adjustFallThroughCount();
BreakContinueStack.pop_back();
uint64_t LoopCount = Cnt.getCount();
uint64_t ExitCount = Cnt.getLoopExitCount();
CondBr->setMetadata(llvm::LLVMContext::MD_prof,
PGO.createBranchWeights(LoopCount, ExitCount));
EmitBranch(CondBlock);
ForScope.ForceCleanup();
@ -776,6 +834,7 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) {
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock(), true);
PGO.setCurrentRegionCount(ExitCount + Cnt.getBreakCounter().getCount());
}
void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
@ -789,6 +848,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
/*init*/ true);
}
EmitBranchThroughCleanup(ReturnBlock);
PGO.setCurrentRegionCount(0);
}
/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
@ -860,6 +920,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
cleanupScope.ForceCleanup();
EmitBranchThroughCleanup(ReturnBlock);
PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
@ -882,8 +943,14 @@ void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
if (HaveInsertPoint())
EmitStopPoint(&S);
JumpDest Block = BreakContinueStack.back().BreakBlock;
EmitBranchThroughCleanup(Block);
BreakContinue &BC = BreakContinueStack.back();
// We keep track of breaks from the loop so we can differentiate them from
// non-local exits in PGO instrumentation. This only applies to loops, not
// breaks from switch statements.
if (BC.CountBreak)
BC.LoopCnt->getBreakCounter().beginRegion(Builder);
EmitBranchThroughCleanup(BC.BreakBlock);
PGO.setCurrentRegionCount(0);
}
void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
@ -895,8 +962,12 @@ void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
if (HaveInsertPoint())
EmitStopPoint(&S);
JumpDest Block = BreakContinueStack.back().ContinueBlock;
EmitBranchThroughCleanup(Block);
BreakContinue &BC = BreakContinueStack.back();
// We keep track of continues in the loop so we can differentiate them from
// non-local exits in PGO instrumentation.
BC.LoopCnt->getContinueCounter().beginRegion(Builder);
EmitBranchThroughCleanup(BC.ContinueBlock);
PGO.setCurrentRegionCount(0);
}
/// EmitCaseStmtRange - If case statement range is not too big then
@ -908,11 +979,14 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
RegionCounter CaseCnt = getPGORegionCounter(&S);
// Emit the code for this case. We do this first to make sure it is
// properly chained from our predecessor before generating the
// switch machinery to enter this block.
EmitBlock(createBasicBlock("sw.bb"));
llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
CaseCnt.beginRegion(Builder);
EmitStmt(S.getSubStmt());
// If range is empty, do nothing.
@ -923,7 +997,17 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
// FIXME: parameters such as this should not be hardcoded.
if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
// Range is small enough to add multiple switch instruction cases.
for (unsigned i = 0, e = Range.getZExtValue() + 1; i != e; ++i) {
uint64_t Total = CaseCnt.getCount() - CaseCnt.getParentCount();
unsigned NCases = Range.getZExtValue() + 1;
// Divide the weights evenly between the cases, ensuring that the total
// weight is preserved. Ie, a weight of 5 over three cases will be
// distributed as weights of 2, 2, and 1.
uint64_t Weight = Total / NCases, Rem = Total % NCases;
for (unsigned I = 0; I != NCases; ++I) {
if (SwitchWeights)
SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
if (Rem)
Rem--;
SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
LHS++;
}
@ -948,7 +1032,19 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
llvm::Value *Cond =
Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
Builder.CreateCondBr(Cond, CaseDest, FalseDest);
llvm::MDNode *Weights = 0;
if (SwitchWeights) {
uint64_t ThisCount = CaseCnt.getCount() - CaseCnt.getParentCount();
uint64_t DefaultCount = (*SwitchWeights)[0];
Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
// Since we're chaining the switch default through each large case range, we
// need to update the weight for the default, ie, the first case, to include
// this case.
(*SwitchWeights)[0] += ThisCount;
}
Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
// Restore the appropriate insertion point.
if (RestoreBB)
@ -974,17 +1070,22 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
return;
}
RegionCounter CaseCnt = getPGORegionCounter(&S);
llvm::ConstantInt *CaseVal =
Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
// If the body of the case is just a 'break', and if there was no fallthrough,
// try to not emit an empty block.
if ((CGM.getCodeGenOpts().OptimizationLevel > 0) &&
// If the body of the case is just a 'break', try to not emit an empty block.
// If we're profiling or we're not optimizing, leave the block in for better
// debug and coverage analysis.
if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
isa<BreakStmt>(S.getSubStmt())) {
JumpDest Block = BreakContinueStack.back().BreakBlock;
// Only do this optimization if there are no cleanups that need emitting.
if (isObviouslyBranchWithoutCleanups(Block)) {
if (SwitchWeights)
SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
SwitchInsn->addCase(CaseVal, Block.getBlock());
// If there was a fallthrough into this case, make sure to redirect it to
@ -999,6 +1100,9 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
EmitBlock(createBasicBlock("sw.bb"));
llvm::BasicBlock *CaseDest = Builder.GetInsertBlock();
if (SwitchWeights)
SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
CaseCnt.beginRegion(Builder);
SwitchInsn->addCase(CaseVal, CaseDest);
// Recursively emitting the statement is acceptable, but is not wonderful for
@ -1016,8 +1120,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
// Otherwise, iteratively add consecutive cases to this switch stmt.
while (NextCase && NextCase->getRHS() == 0) {
CurCase = NextCase;
llvm::ConstantInt *CaseVal =
llvm::ConstantInt *CaseVal =
Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
CaseCnt = getPGORegionCounter(NextCase);
if (SwitchWeights)
SwitchWeights->push_back(CaseCnt.getCount() - CaseCnt.getParentCount());
CaseCnt.beginRegion(Builder);
SwitchInsn->addCase(CaseVal, CaseDest);
NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
}
@ -1030,7 +1140,22 @@ void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) {
llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
assert(DefaultBlock->empty() &&
"EmitDefaultStmt: Default block already defined?");
llvm::BasicBlock *SkipCountBB = 0;
if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
// The PGO region here needs to count the number of times the edge occurs,
// so fallthrough into this case will jump past the region counter to the
// skipcount basic block.
SkipCountBB = createBasicBlock("skipcount");
EmitBranch(SkipCountBB);
}
EmitBlock(DefaultBlock);
RegionCounter Cnt = getPGORegionCounter(&S);
Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
if (SkipCountBB)
EmitBlock(SkipCountBB);
EmitStmt(S.getSubStmt());
}
@ -1187,7 +1312,8 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
static bool FindCaseStatementsForValue(const SwitchStmt &S,
const llvm::APSInt &ConstantCondValue,
SmallVectorImpl<const Stmt*> &ResultStmts,
ASTContext &C) {
ASTContext &C,
const SwitchCase *&ResultCase) {
// First step, find the switch case that is being branched to. We can do this
// efficiently by scanning the SwitchCase list.
const SwitchCase *Case = S.getSwitchCaseList();
@ -1230,6 +1356,7 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S,
// while (1) {
// case 4: ...
bool FoundCase = false;
ResultCase = Case;
return CollectStatementsForCase(S.getBody(), Case, FoundCase,
ResultStmts) != CSFC_Failure &&
FoundCase;
@ -1245,6 +1372,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// Handle nested switch statements.
llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
// See if we can constant fold the condition of the switch and therefore only
@ -1252,8 +1380,14 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
llvm::APSInt ConstantCondValue;
if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
SmallVector<const Stmt*, 4> CaseStmts;
const SwitchCase *Case = 0;
if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
getContext())) {
getContext(), Case)) {
PGO.setCurrentRegionCount(0);
if (Case) {
RegionCounter CaseCnt = getPGORegionCounter(Case);
CaseCnt.beginRegion(Builder);
}
RunCleanupsScope ExecutedScope(*this);
// At this point, we are no longer "within" a switch instance, so
@ -1265,6 +1399,8 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// specified series of statements and we're good.
for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
EmitStmt(CaseStmts[i]);
RegionCounter ExitCnt = getPGORegionCounter(&S);
ExitCnt.beginRegion(Builder);
// Now we want to restore the saved switch instance so that nested
// switches continue to function properly
@ -1282,18 +1418,41 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// failure.
llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
if (PGO.haveRegionCounts()) {
// Walk the SwitchCase list to find how many there are.
uint64_t DefaultCount = 0;
unsigned NumCases = 0;
for (const SwitchCase *Case = S.getSwitchCaseList();
Case;
Case = Case->getNextSwitchCase()) {
if (isa<DefaultStmt>(Case))
DefaultCount = getPGORegionCounter(Case).getCount();
NumCases += 1;
}
SwitchWeights = new SmallVector<uint64_t, 16>();
SwitchWeights->reserve(NumCases);
// The default needs to be first. We store the edge count, so we already
// know the right weight.
SwitchWeights->push_back(DefaultCount);
}
CaseRangeBlock = DefaultBlock;
// Clear the insertion point to indicate we are in unreachable code.
Builder.ClearInsertionPoint();
PGO.setCurrentRegionCount(0);
// All break statements jump to NextBlock. If BreakContinueStack is non-empty
// then reuse last ContinueBlock.
// then reuse last ContinueBlock and that block's counter.
JumpDest OuterContinue;
if (!BreakContinueStack.empty())
OuterContinue = BreakContinueStack.back().ContinueBlock;
RegionCounter *OuterCount = 0;
if (!BreakContinueStack.empty()) {
BreakContinue &BC = BreakContinueStack.back();
OuterContinue = BC.ContinueBlock;
OuterCount = BC.LoopCnt;
}
BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));
BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue,
OuterCount, /*CountBreak=*/false));
// Emit switch body.
EmitStmt(S.getBody());
@ -1322,8 +1481,20 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
// Emit continuation.
EmitBlock(SwitchExit.getBlock(), true);
RegionCounter ExitCnt = getPGORegionCounter(&S);
ExitCnt.beginRegion(Builder);
if (SwitchWeights) {
assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
"switch weights do not match switch cases");
// If there's only one jump destination there's no sense weighting it.
if (SwitchWeights->size() > 1)
SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
PGO.createBranchWeights(*SwitchWeights));
delete SwitchWeights;
}
SwitchInsn = SavedSwitchInsn;
SwitchWeights = SavedSwitchWeights;
CaseRangeBlock = SavedCRBlock;
}

View File

@ -53,6 +53,7 @@ add_clang_library(clangCodeGen
CodeGenModule.cpp
CodeGenTBAA.cpp
CodeGenTypes.cpp
CodeGenPGO.cpp
ItaniumCXXABI.cpp
MicrosoftCXXABI.cpp
ModuleBuilder.cpp

View File

@ -16,6 +16,7 @@
#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CodeGenModule.h"
#include "CodeGenPGO.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
@ -44,7 +45,8 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
SwitchInsn(0), CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
PGO(cgm), SwitchInsn(0), SwitchWeights(0),
CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
CXXThisValue(0), CXXDefaultInitExprThis(0),
CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
@ -571,6 +573,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (CGM.getCodeGenOpts().InstrumentForProfiling)
EmitMCountInstrumentation();
PGO.assignRegionCounters(GD);
if (RetTy->isVoidType()) {
// Void type; nothing to return.
ReturnValue = 0;
@ -643,6 +647,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
const Stmt *Body) {
RegionCounter Cnt = getPGORegionCounter(Body);
Cnt.beginRegion(Builder);
if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
EmitCompoundStmtWithoutScope(*S);
else
@ -772,6 +778,9 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// a quick pass now to see if we can.
if (!CurFn->doesNotThrow())
TryMarkNoThrow(CurFn);
PGO.emitWriteoutFunction(CurGD);
PGO.destroyRegionCounters();
}
/// ContainsLabel - Return true if the statement contains a label in it. If
@ -870,10 +879,13 @@ ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) {
///
void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
llvm::BasicBlock *TrueBlock,
llvm::BasicBlock *FalseBlock) {
llvm::BasicBlock *FalseBlock,
uint64_t TrueCount) {
Cond = Cond->IgnoreParens();
if (const BinaryOperator *CondBOp = dyn_cast<BinaryOperator>(Cond)) {
RegionCounter Cnt = getPGORegionCounter(CondBOp);
// Handle X && Y in a condition.
if (CondBOp->getOpcode() == BO_LAnd) {
// If we have "1 && X", simplify the code. "0 && X" would have constant
@ -882,7 +894,9 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
ConstantBool) {
// br(1 && X) -> br(X).
return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
Cnt.beginRegion(Builder);
return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
TrueCount);
}
// If we have "X && 1", simplify the code to use an uncond branch.
@ -890,21 +904,28 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
ConstantBool) {
// br(X && 1) -> br(X).
return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
TrueCount);
}
// Emit the LHS as a conditional. If the LHS conditional is false, we
// want to jump to the FalseBlock.
llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
// The counter tells us how often we evaluate RHS, and all of TrueCount
// can be propagated to that branch.
uint64_t RHSCount = Cnt.getCount();
ConditionalEvaluation eval(*this);
EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock);
EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
EmitBlock(LHSTrue);
// Any temporaries created here are conditional.
Cnt.beginRegion(Builder);
eval.begin(*this);
EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
eval.end(*this);
Cnt.adjustFallThroughCount();
Cnt.applyAdjustmentsToRegion();
return;
}
@ -916,7 +937,9 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
!ConstantBool) {
// br(0 || X) -> br(X).
return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
Cnt.beginRegion(Builder);
return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
TrueCount);
}
// If we have "X || 0", simplify the code to use an uncond branch.
@ -924,21 +947,31 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (ConstantFoldsToSimpleInteger(CondBOp->getRHS(), ConstantBool) &&
!ConstantBool) {
// br(X || 0) -> br(X).
return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock);
return EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, FalseBlock,
TrueCount);
}
// Emit the LHS as a conditional. If the LHS conditional is true, we
// want to jump to the TrueBlock.
llvm::BasicBlock *LHSFalse = createBasicBlock("lor.lhs.false");
// We have the count for entry to the RHS and for the whole expression
// being true, so we can divy up True count between the short circuit and
// the RHS.
uint64_t LHSCount = TrueCount - Cnt.getCount();
uint64_t RHSCount = TrueCount - LHSCount;
ConditionalEvaluation eval(*this);
EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse);
EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
EmitBlock(LHSFalse);
// Any temporaries created here are conditional.
Cnt.beginRegion(Builder);
eval.begin(*this);
EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock);
EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
eval.end(*this);
Cnt.adjustFallThroughCount();
Cnt.applyAdjustmentsToRegion();
return;
}
@ -946,8 +979,13 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
if (const UnaryOperator *CondUOp = dyn_cast<UnaryOperator>(Cond)) {
// br(!x, t, f) -> br(x, f, t)
if (CondUOp->getOpcode() == UO_LNot)
return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock);
if (CondUOp->getOpcode() == UO_LNot) {
// Negate the count.
uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
// Negate the condition and swap the destination blocks.
return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
FalseCount);
}
}
if (const ConditionalOperator *CondOp = dyn_cast<ConditionalOperator>(Cond)) {
@ -955,17 +993,33 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
RegionCounter Cnt = getPGORegionCounter(CondOp);
ConditionalEvaluation cond(*this);
EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock);
EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
// When computing PGO branch weights, we only know the overall count for
// the true block. This code is essentially doing tail duplication of the
// naive code-gen, introducing new edges for which counts are not
// available. Divide the counts proportionally between the LHS and RHS of
// the conditional operator.
uint64_t LHSScaledTrueCount = 0;
if (TrueCount) {
double LHSRatio = Cnt.getCount() / (double) PGO.getCurrentRegionCount();
LHSScaledTrueCount = TrueCount * LHSRatio;
}
cond.begin(*this);
EmitBlock(LHSBlock);
EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock);
Cnt.beginRegion(Builder);
EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
LHSScaledTrueCount);
cond.end(*this);
cond.begin(*this);
EmitBlock(RHSBlock);
EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock);
Cnt.beginElseRegion();
EmitBranchOnBoolExpr(CondOp->getRHS(), TrueBlock, FalseBlock,
TrueCount - LHSScaledTrueCount);
cond.end(*this);
return;
@ -981,9 +1035,15 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
return;
}
// Create branch weights based on the number of times we get here and the
// number of times the condition should be true.
uint64_t CurrentCount = PGO.getCurrentRegionCountWithMin(TrueCount);
llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
CurrentCount - TrueCount);
// Emit the code with the fully general case.
llvm::Value *CondV = EvaluateExprAsBool(Cond);
Builder.CreateCondBr(CondV, TrueBlock, FalseBlock);
Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
}
/// ErrorUnsupported - Print out an error that codegen doesn't support the

View File

@ -19,6 +19,7 @@
#include "CGValue.h"
#include "EHScopeStack.h"
#include "CodeGenModule.h"
#include "CodeGenPGO.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/ExprObjC.h"
@ -817,19 +818,36 @@ private:
llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;
// BreakContinueStack - This keeps track of where break and continue
// statements should jump to.
// statements should jump to and the associated base counter for
// instrumentation.
struct BreakContinue {
BreakContinue(JumpDest Break, JumpDest Continue)
: BreakBlock(Break), ContinueBlock(Continue) {}
BreakContinue(JumpDest Break, JumpDest Continue, RegionCounter *LoopCnt,
bool CountBreak = true)
: BreakBlock(Break), ContinueBlock(Continue), LoopCnt(LoopCnt),
CountBreak(CountBreak) {}
JumpDest BreakBlock;
JumpDest ContinueBlock;
RegionCounter *LoopCnt;
bool CountBreak;
};
SmallVector<BreakContinue, 8> BreakContinueStack;
CodeGenPGO PGO;
public:
/// Get a counter for instrumentation of the region associated with the given
/// statement.
RegionCounter getPGORegionCounter(const Stmt *S) {
return RegionCounter(PGO, S);
}
private:
/// SwitchInsn - This is nearest current switch instruction. It is null if
/// current context is not in a switch.
llvm::SwitchInst *SwitchInsn;
/// The branch weights of SwitchInsn when doing instrumentation based PGO.
SmallVector<uint64_t, 16> *SwitchWeights;
/// CaseRangeBlock - This block holds if condition check for last case
/// statement range in current switch instruction.
@ -2413,8 +2431,10 @@ public:
/// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
/// if statement) to the specified blocks. Based on the condition, this might
/// try to simplify the codegen of the conditional based on the branch.
/// TrueCount should be the number of times we expect the condition to
/// evaluate to true based on PGO data.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock,
llvm::BasicBlock *FalseBlock);
llvm::BasicBlock *FalseBlock, uint64_t TrueCount);
/// \brief Emit a description of a type in a format suitable for passing to
/// a runtime sanitizer handler.

View File

@ -20,6 +20,7 @@
#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenTBAA.h"
#include "CodeGenPGO.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
@ -77,7 +78,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
NoObjCARCExceptionsMetadata(0), RRData(0), CFConstantStringClassRef(0),
NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
CFConstantStringClassRef(0),
ConstantStringClassRef(0), NSConstantStringType(0),
NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
@ -131,6 +133,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO,
if (C.getLangOpts().ObjCAutoRefCount)
ARCData = new ARCEntrypoints();
RRData = new RREntrypoints();
if (!CodeGenOpts.InstrProfileInput.empty())
PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
}
CodeGenModule::~CodeGenModule() {
@ -2181,6 +2186,10 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
if (PGOInit)
AddGlobalCtor(PGOInit, 0);
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {

View File

@ -85,7 +85,8 @@ namespace CodeGen {
class CGCUDARuntime;
class BlockFieldFlags;
class FunctionArgList;
class PGOProfileData;
struct OrderGlobalInits {
unsigned int priority;
unsigned int lex_order;
@ -258,6 +259,7 @@ class CodeGenModule : public CodeGenTypeCache {
ARCEntrypoints *ARCData;
llvm::MDNode *NoObjCARCExceptionsMetadata;
RREntrypoints *RRData;
PGOProfileData *PGOData;
// WeakRefReferences - A set of references that have only been seen via
// a weakref so far. This is used to remove the weak of the reference if we
@ -479,6 +481,10 @@ public:
return *RRData;
}
PGOProfileData *getPGOData() const {
return PGOData;
}
llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
return StaticLocalDeclMap[D];
}

View File

@ -0,0 +1,456 @@
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Instrumentation-based profile-guided optimization
//
//===----------------------------------------------------------------------===//
#include "CodeGenPGO.h"
#include "CodeGenFunction.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/StmtVisitor.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/FileSystem.h"
using namespace clang;
using namespace CodeGen;
static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
DiagnosticsEngine &Diags = CGM.getDiags();
unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, Message);
Diags.Report(DiagID);
}
PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
: CGM(CGM) {
if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
ReportBadPGOData(CGM, "failed to open pgo data file");
return;
}
if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
ReportBadPGOData(CGM, "pgo data file too big");
return;
}
// Scan through the data file and map each function to the corresponding
// file offset where its counts are stored.
const char *BufferStart = DataBuffer->getBufferStart();
const char *BufferEnd = DataBuffer->getBufferEnd();
const char *CurPtr = BufferStart;
while (CurPtr < BufferEnd) {
// Read the mangled function name.
const char *FuncName = CurPtr;
// FIXME: Something will need to be added to distinguish static functions.
CurPtr = strchr(CurPtr, ' ');
if (!CurPtr) {
ReportBadPGOData(CGM, "pgo data file has malformed function entry");
return;
}
StringRef MangledName(FuncName, CurPtr - FuncName);
// Read the number of counters.
char *EndPtr;
unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
return;
}
CurPtr = EndPtr;
// There is one line for each counter; skip over those lines.
for (unsigned N = 0; N < NumCounters; ++N) {
CurPtr = strchr(++CurPtr, '\n');
if (!CurPtr) {
ReportBadPGOData(CGM, "pgo data file is missing some counter info");
return;
}
}
// Skip over the blank line separating functions.
CurPtr += 2;
DataOffsets[MangledName] = FuncName - BufferStart;
}
}
bool PGOProfileData::getFunctionCounts(StringRef MangledName,
std::vector<uint64_t> &Counts) {
// Find the relevant section of the pgo-data file.
llvm::StringMap<unsigned>::const_iterator OffsetIter =
DataOffsets.find(MangledName);
if (OffsetIter == DataOffsets.end())
return true;
const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
// Skip over the function name.
CurPtr = strchr(CurPtr, ' ');
assert(CurPtr && "pgo-data has corrupted function entry");
// Read the number of counters.
char *EndPtr;
unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
"pgo-data file has corrupted number of counters");
CurPtr = EndPtr;
Counts.reserve(NumCounters);
for (unsigned N = 0; N < NumCounters; ++N) {
// Read the count value.
uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
if (EndPtr == CurPtr || *EndPtr != '\n') {
ReportBadPGOData(CGM, "pgo-data file has bad count value");
return true;
}
Counts.push_back(Count);
CurPtr = EndPtr + 1;
}
// Make sure the number of counters matches up.
if (Counts.size() != NumCounters) {
ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
return true;
}
return false;
}
void CodeGenPGO::emitWriteoutFunction(GlobalDecl &GD) {
if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
return;
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
llvm::Function *WriteoutF =
CGM.getModule().getFunction("__llvm_pgo_writeout");
if (!WriteoutF) {
llvm::FunctionType *WriteoutFTy =
llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false);
WriteoutF = llvm::Function::Create(WriteoutFTy,
llvm::GlobalValue::InternalLinkage,
"__llvm_pgo_writeout", &CGM.getModule());
}
WriteoutF->setUnnamedAddr(true);
WriteoutF->addFnAttr(llvm::Attribute::NoInline);
if (CGM.getCodeGenOpts().DisableRedZone)
WriteoutF->addFnAttr(llvm::Attribute::NoRedZone);
llvm::BasicBlock *BB = WriteoutF->empty() ?
llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock();
CGBuilderTy PGOBuilder(BB);
llvm::Instruction *I = BB->getTerminator();
if (!I)
I = PGOBuilder.CreateRetVoid();
PGOBuilder.SetInsertPoint(I);
llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
llvm::Type *Args[] = {
Int8PtrTy, // const char *MangledName
Int32Ty, // uint32_t NumCounters
Int64PtrTy // uint64_t *Counters
};
llvm::FunctionType *FTy =
llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false);
llvm::Constant *EmitFunc =
CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy);
llvm::Constant *MangledName =
CGM.GetAddrOfConstantCString(CGM.getMangledName(GD), "__llvm_pgo_name");
MangledName = llvm::ConstantExpr::getBitCast(MangledName, Int8PtrTy);
PGOBuilder.CreateCall3(EmitFunc, MangledName,
PGOBuilder.getInt32(NumRegionCounters),
PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy));
}
llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
llvm::Function *WriteoutF =
CGM.getModule().getFunction("__llvm_pgo_writeout");
if (!WriteoutF)
return NULL;
// Create a small bit of code that registers the "__llvm_pgo_writeout" to
// be executed at exit.
llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init");
if (F)
return NULL;
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx),
false);
F = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
"__llvm_pgo_init", &CGM.getModule());
F->setUnnamedAddr(true);
F->setLinkage(llvm::GlobalValue::InternalLinkage);
F->addFnAttr(llvm::Attribute::NoInline);
if (CGM.getCodeGenOpts().DisableRedZone)
F->addFnAttr(llvm::Attribute::NoRedZone);
llvm::BasicBlock *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F);
CGBuilderTy PGOBuilder(BB);
FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), false);
llvm::Type *Params[] = {
llvm::PointerType::get(FTy, 0)
};
FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Params, false);
// Inialize the environment and register the local writeout function.
llvm::Constant *PGOInit =
CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy);
PGOBuilder.CreateCall(PGOInit, WriteoutF);
PGOBuilder.CreateRetVoid();
return F;
}
namespace {
/// A StmtVisitor that fills a map of statements to PGO counters.
struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
/// The next counter value to assign.
unsigned NextCounter;
/// The map of statements to counters.
llvm::DenseMap<const Stmt*, unsigned> *CounterMap;
MapRegionCounters(llvm::DenseMap<const Stmt*, unsigned> *CounterMap) :
NextCounter(0), CounterMap(CounterMap) {
}
void VisitChildren(const Stmt *S) {
for (Stmt::const_child_range I = S->children(); I; ++I)
if (*I)
this->Visit(*I);
}
void VisitStmt(const Stmt *S) { VisitChildren(S); }
/// Assign a counter to track entry to the function body
void VisitFunctionDecl(const FunctionDecl *S) {
(*CounterMap)[S->getBody()] = NextCounter++;
Visit(S->getBody());
}
/// Assign a counter to track the block following a label
void VisitLabelStmt(const LabelStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getSubStmt());
}
/// Assign three counters - one for the body of the loop, one for breaks
/// from the loop, and one for continues.
///
/// The break and continue counters cover all such statements in this loop,
/// and are used in calculations to find the number of times the condition
/// and exit of the loop occur. They are needed so we can differentiate
/// these statements from non-local exits like return and goto.
void VisitWhileStmt(const WhileStmt *S) {
(*CounterMap)[S] = NextCounter;
NextCounter += 3;
Visit(S->getCond());
Visit(S->getBody());
}
/// Assign counters for the body of the loop, and for breaks and
/// continues. See VisitWhileStmt.
void VisitDoStmt(const DoStmt *S) {
(*CounterMap)[S] = NextCounter;
NextCounter += 3;
Visit(S->getBody());
Visit(S->getCond());
}
/// Assign counters for the body of the loop, and for breaks and
/// continues. See VisitWhileStmt.
void VisitForStmt(const ForStmt *S) {
(*CounterMap)[S] = NextCounter;
NextCounter += 3;
const Expr *E;
if ((E = S->getCond()))
Visit(E);
Visit(S->getBody());
if ((E = S->getInc()))
Visit(E);
}
/// Assign counters for the body of the loop, and for breaks and
/// continues. See VisitWhileStmt.
void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
(*CounterMap)[S] = NextCounter;
NextCounter += 3;
const Expr *E;
if ((E = S->getCond()))
Visit(E);
Visit(S->getBody());
if ((E = S->getInc()))
Visit(E);
}
/// Assign counters for the body of the loop, and for breaks and
/// continues. See VisitWhileStmt.
void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
(*CounterMap)[S] = NextCounter;
NextCounter += 3;
Visit(S->getElement());
Visit(S->getBody());
}
/// Assign a counter for the exit block of the switch statement.
void VisitSwitchStmt(const SwitchStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getCond());
Visit(S->getBody());
}
/// Assign a counter for a particular case in a switch. This counts jumps
/// from the switch header as well as fallthrough from the case before this
/// one.
void VisitCaseStmt(const CaseStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getSubStmt());
}
/// Assign a counter for the default case of a switch statement. The count
/// is the number of branches from the loop header to the default, and does
/// not include fallthrough from previous cases. If we have multiple
/// conditional branch blocks from the switch instruction to the default
/// block, as with large GNU case ranges, this is the counter for the last
/// edge in that series, rather than the first.
void VisitDefaultStmt(const DefaultStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getSubStmt());
}
/// Assign a counter for the "then" part of an if statement. The count for
/// the "else" part, if it exists, will be calculated from this counter.
void VisitIfStmt(const IfStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getCond());
Visit(S->getThen());
if (S->getElse())
Visit(S->getElse());
}
/// Assign a counter for the continuation block of a C++ try statement.
void VisitCXXTryStmt(const CXXTryStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getTryBlock());
for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
Visit(S->getHandler(I));
}
/// Assign a counter for a catch statement's handler block.
void VisitCXXCatchStmt(const CXXCatchStmt *S) {
(*CounterMap)[S] = NextCounter++;
Visit(S->getHandlerBlock());
}
/// Assign a counter for the "true" part of a conditional operator. The
/// count in the "false" part will be calculated from this counter.
void VisitConditionalOperator(const ConditionalOperator *E) {
(*CounterMap)[E] = NextCounter++;
Visit(E->getCond());
Visit(E->getTrueExpr());
Visit(E->getFalseExpr());
}
/// Assign a counter for the right hand side of a logical and operator.
void VisitBinLAnd(const BinaryOperator *E) {
(*CounterMap)[E] = NextCounter++;
Visit(E->getLHS());
Visit(E->getRHS());
}
/// Assign a counter for the right hand side of a logical or operator.
void VisitBinLOr(const BinaryOperator *E) {
(*CounterMap)[E] = NextCounter++;
Visit(E->getLHS());
Visit(E->getRHS());
}
};
}
void CodeGenPGO::assignRegionCounters(GlobalDecl &GD) {
bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
PGOProfileData *PGOData = CGM.getPGOData();
if (!InstrumentRegions && !PGOData)
return;
const Decl *D = GD.getDecl();
if (!D)
return;
mapRegionCounters(D);
if (InstrumentRegions)
emitCounterVariables();
if (PGOData)
loadRegionCounts(GD, PGOData);
}
void CodeGenPGO::mapRegionCounters(const Decl *D) {
RegionCounterMap = new llvm::DenseMap<const Stmt*, unsigned>();
MapRegionCounters Walker(RegionCounterMap);
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
Walker.VisitFunctionDecl(FD);
NumRegionCounters = Walker.NextCounter;
}
void CodeGenPGO::emitCounterVariables() {
llvm::LLVMContext &Ctx = CGM.getLLVMContext();
llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
NumRegionCounters);
RegionCounters =
new llvm::GlobalVariable(CGM.getModule(), CounterTy, false,
llvm::GlobalVariable::PrivateLinkage,
llvm::Constant::getNullValue(CounterTy),
"__llvm_pgo_ctr");
}
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
return;
llvm::Value *Addr =
Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Addr);
}
void CodeGenPGO::loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData) {
// For now, ignore the counts from the PGO data file only if the number of
// counters does not match. This could be tightened down in the future to
// ignore counts when the input changes in various ways, e.g., by comparing a
// hash value based on some characteristics of the input.
RegionCounts = new std::vector<uint64_t>();
if (PGOData->getFunctionCounts(CGM.getMangledName(GD), *RegionCounts) ||
RegionCounts->size() != NumRegionCounters) {
delete RegionCounts;
RegionCounts = 0;
}
}
void CodeGenPGO::destroyRegionCounters() {
if (RegionCounterMap != 0)
delete RegionCounterMap;
if (RegionCounts != 0)
delete RegionCounts;
}
llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
uint64_t FalseCount) {
if (!TrueCount && !FalseCount)
return 0;
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
// TODO: need to scale down to 32-bits
// According to Laplace's Rule of Succession, it is better to compute the
// weight based on the count plus 1.
return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
}
llvm::MDNode *
CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
// TODO: need to scale down to 32-bits, instead of just truncating.
// According to Laplace's Rule of Succession, it is better to compute the
// weight based on the count plus 1.
SmallVector<uint32_t, 16> ScaledWeights;
ScaledWeights.reserve(Weights.size());
for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
WI != WE; ++WI) {
ScaledWeights.push_back(*WI + 1);
}
return MDHelper.createBranchWeights(ScaledWeights);
}

View File

@ -0,0 +1,216 @@
//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Instrumentation-based profile-guided optimization
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_CODEGEN_CODEGENPGO_H
#define CLANG_CODEGEN_CODEGENPGO_H
#include "CGBuilder.h"
#include "CodeGenModule.h"
#include "CodeGenTypes.h"
#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/MemoryBuffer.h"
namespace clang {
namespace CodeGen {
class RegionCounter;
/// The raw counter data from an instrumented PGO binary
class PGOProfileData {
private:
/// The PGO data
llvm::OwningPtr<llvm::MemoryBuffer> DataBuffer;
/// Offsets into DataBuffer for each function's counters
llvm::StringMap<unsigned> DataOffsets;
CodeGenModule &CGM;
public:
PGOProfileData(CodeGenModule &CGM, std::string Path);
/// Fill Counts with the profile data for the given function name. Returns
/// false on success.
bool getFunctionCounts(StringRef MangledName, std::vector<uint64_t> &Counts);
};
/// Per-function PGO state. This class should generally not be used directly,
/// but instead through the CodeGenFunction and RegionCounter types.
class CodeGenPGO {
private:
CodeGenModule &CGM;
unsigned NumRegionCounters;
llvm::GlobalVariable *RegionCounters;
llvm::DenseMap<const Stmt*, unsigned> *RegionCounterMap;
std::vector<uint64_t> *RegionCounts;
uint64_t CurrentRegionCount;
public:
CodeGenPGO(CodeGenModule &CGM)
: CGM(CGM), NumRegionCounters(0), RegionCounters(0), RegionCounterMap(0),
RegionCounts(0), CurrentRegionCount(0) {}
~CodeGenPGO() {}
/// Whether or not we have PGO region data for the current function. This is
/// false both when we have no data at all and when our data has been
/// discarded.
bool haveRegionCounts() const { return RegionCounts != 0; }
/// Return the counter value of the current region.
uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
/// Return the counter value of the current region, or \p Min if it is larger.
uint64_t getCurrentRegionCountWithMin(uint64_t Min) {
return std::max(Min, CurrentRegionCount);
}
/// Set the counter value for the current region. This is used to keep track
/// of changes to the most recent counter from control flow and non-local
/// exits.
void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
/// Calculate branch weights appropriate for PGO data
llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
/// Assign counters to regions and configure them for PGO of a given
/// function. Does nothing if instrumentation is not enabled and either
/// generates global variables or associates PGO data with each of the
/// counters depending on whether we are generating or using instrumentation.
void assignRegionCounters(GlobalDecl &GD);
/// Emit code to write counts for a given function to disk, if necessary.
void emitWriteoutFunction(GlobalDecl &GD);
/// Clean up region counter state. Must be called if assignRegionCounters is
/// used.
void destroyRegionCounters();
/// Emit the logic to register region counter write out functions. Returns a
/// function that implements this logic.
static llvm::Function *emitInitialization(CodeGenModule &CGM);
private:
void mapRegionCounters(const Decl *D);
void loadRegionCounts(GlobalDecl &GD, PGOProfileData *PGOData);
void emitCounterVariables();
/// Emit code to increment the counter at the given index
void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
/// Return the region counter for the given statement. This should only be
/// called on statements that have a dedicated counter.
unsigned getRegionCounter(const Stmt *S) {
if (RegionCounterMap == 0)
return 0;
return (*RegionCounterMap)[S];
}
/// Return the region count for the counter at the given index.
uint64_t getRegionCount(unsigned Counter) {
if (!haveRegionCounts())
return 0;
return (*RegionCounts)[Counter];
}
friend class RegionCounter;
};
/// A counter for a particular region. This is the primary interface through
/// which clients manage PGO counters and their values.
class RegionCounter {
CodeGenPGO *PGO;
unsigned Counter;
uint64_t Count;
uint64_t ParentCount;
uint64_t RegionCount;
int64_t Adjust;
RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
: PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
public:
RegionCounter(CodeGenPGO &PGO, const Stmt *S)
: PGO(&PGO), Counter(PGO.getRegionCounter(S)),
Count(PGO.getRegionCount(Counter)),
ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
/// Get the value of the counter. In most cases this is the number of times
/// the region of the counter was entered, but for switch labels it's the
/// number of direct jumps to that label.
uint64_t getCount() const { return Count; }
/// Get the value of the counter with adjustments applied. Adjustments occur
/// when control enters or leaves the region abnormally, ie, if there is a
/// jump to a label within the region, or if the function can return from
/// within the region. The adjusted count, then, is the value of the counter
/// at the end of the region.
uint64_t getAdjustedCount() const {
assert(Adjust > 0 || (uint64_t)(-Adjust) <= Count && "Negative count");
return Count + Adjust;
}
/// Get the value of the counter in this region's parent, ie, the region that
/// was active when this region began. This is useful for deriving counts in
/// implicitly counted regions, like the false case of a condition or the
/// normal exits of a loop.
uint64_t getParentCount() const { return ParentCount; }
/// Get the number of times the condition of a loop will evaluate false. This
/// is the number of times we enter the loop, adjusted by the difference
/// between entering and exiting the loop body normally, excepting that
/// 'continue' statements also bring us back here.
///
/// Undefined if this counter is not counting a loop.
uint64_t getLoopExitCount() const {
return getParentCount() + getContinueCounter().getCount() +
getAdjustedCount() - getCount();
}
/// Get the associated break counter. Undefined if this counter is not
/// counting a loop.
RegionCounter getBreakCounter() const {
return RegionCounter(*PGO, Counter + 1);
}
/// Get the associated continue counter. Undefined if this counter is not
/// counting a loop.
RegionCounter getContinueCounter() const {
return RegionCounter(*PGO, Counter + 2);
}
/// Activate the counter by emitting an increment and starting to track
/// adjustments. If AddIncomingFallThrough is true, the current region count
/// will be added to the counter for the purposes of tracking the region.
void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
RegionCount = Count;
if (AddIncomingFallThrough)
RegionCount += PGO->getCurrentRegionCount();
PGO->setCurrentRegionCount(RegionCount);
PGO->emitCounterIncrement(Builder, Counter);
}
/// For counters on boolean branches, begins tracking adjustments for the
/// uncounted path.
void beginElseRegion() {
RegionCount = ParentCount - Count;
PGO->setCurrentRegionCount(RegionCount);
}
/// Control may either enter or leave the region, so the count at the end may
/// be different from the start. Call this to track that adjustment without
/// modifying the current count. Must not be called before one of beginRegion
/// or beginElseRegion.
void adjustFallThroughCount() {
Adjust += PGO->getCurrentRegionCount() - RegionCount;
}
/// Commit all adjustments to the current region. This should be called after
/// all blocks that adjust the fallthrough count have been emitted.
void applyAdjustmentsToRegion() {
PGO->setCurrentRegionCount(ParentCount + Adjust);
}
};
} // end namespace CodeGen
} // end namespace clang
#endif

View File

@ -0,0 +1,127 @@
simple_loops 10
1
100
0
0
100
0
0
76
0
0
conditionals 13
1
100
0
0
50
50
33
33
16
99
100
99
100
early_exits 13
1
0
51
1
25
1
25
1
26
0
0
1
0
jumps 30
1
1
0
0
0
1
0
0
0
0
1
0
1
2
3
2
0
0
0
3
0
1
1
1
10
0
0
0
10
9
switches 21
1
1
1
15
0
7
7
1
0
3
2
3
3
4
4
0
4
4
5
1
0
big_switch 19
1
32
0
0
32
1
0
2
1
11
11
1
1
15
15
1
1
2
2
no_usable_data 5
1
1
1
1
1
main 1
1

View File

@ -0,0 +1,428 @@
// Test that instrumentation based profiling feeds branch prediction
// correctly. This tests both generation of profile data and use of the same,
// and the input file for the -fprofile-instr-use case is expected to be result
// of running the program generated by the -fprofile-instr-generate case
// (excepting no_usable_data). As such, main() should call every function in
// this test.
// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
// PGOGEN: @[[SLC:__llvm_pgo_ctr[0-9]*]] = private global [10 x i64] zeroinitializer
// PGOGEN: @[[IFC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
// PGOGEN: @[[EEC:__llvm_pgo_ctr[0-9]*]] = private global [13 x i64] zeroinitializer
// PGOGEN: @[[JMC:__llvm_pgo_ctr[0-9]*]] = private global [30 x i64] zeroinitializer
// PGOGEN: @[[SWC:__llvm_pgo_ctr[0-9]*]] = private global [21 x i64] zeroinitializer
// PGOGEN: @[[BSC:__llvm_pgo_ctr[0-9]*]] = private global [19 x i64] zeroinitializer
// PGOGEN: @[[NOC:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer
// PGOGEN: @[[MAC:__llvm_pgo_ctr[0-9]*]] = private global [1 x i64] zeroinitializer
// PGOGEN-LABEL: @simple_loops()
// PGOUSE-LABEL: @simple_loops()
// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 0
void simple_loops() {
int i;
// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 1
// PGOUSE: br {{.*}} !prof ![[SL1:[0-9]+]]
for (i = 0; i < 100; ++i) {
}
// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 4
// PGOUSE: br {{.*}} !prof ![[SL2:[0-9]+]]
while (i > 0)
i--;
// PGOGEN: store {{.*}} @[[SLC]], i64 0, i64 7
// PGOUSE: br {{.*}} !prof ![[SL3:[0-9]+]]
do {} while (i++ < 75);
// PGOGEN-NOT: store {{.*}} @[[SLC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
}
// PGOGEN-LABEL: @conditionals()
// PGOUSE-LABEL: @conditionals()
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 0
void conditionals() {
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 1
// PGOUSE: br {{.*}} !prof ![[IF1:[0-9]+]]
for (int i = 0; i < 100; ++i) {
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 4
// PGOUSE: br {{.*}} !prof ![[IF2:[0-9]+]]
if (i % 2) {
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 5
// PGOUSE: br {{.*}} !prof ![[IF3:[0-9]+]]
if (i) {}
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 6
// PGOUSE: br {{.*}} !prof ![[IF4:[0-9]+]]
} else if (i % 3) {
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 7
// PGOUSE: br {{.*}} !prof ![[IF5:[0-9]+]]
if (i) {}
} else {
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 8
// PGOUSE: br {{.*}} !prof ![[IF6:[0-9]+]]
if (i) {}
}
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 10
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 9
// PGOUSE: br {{.*}} !prof ![[IF7:[0-9]+]]
if (1 && i) {}
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 12
// PGOGEN: store {{.*}} @[[IFC]], i64 0, i64 11
// PGOUSE: br {{.*}} !prof ![[IF8:[0-9]+]]
if (0 || i) {}
}
// PGOGEN-NOT: store {{.*}} @[EEC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
}
// PGOGEN-LABEL: @early_exits()
// PGOUSE-LABEL: @early_exits()
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 0
void early_exits() {
int i = 0;
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 1
// PGOUSE: br {{.*}} !prof ![[EE1:[0-9]+]]
if (i) {}
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 2
// PGOUSE: br {{.*}} !prof ![[EE2:[0-9]+]]
while (i < 100) {
i++;
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 5
// PGOUSE: br {{.*}} !prof ![[EE3:[0-9]+]]
if (i > 50)
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 3
break;
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 6
// PGOUSE: br {{.*}} !prof ![[EE4:[0-9]+]]
if (i % 2)
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 4
continue;
}
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 7
// PGOUSE: br {{.*}} !prof ![[EE5:[0-9]+]]
if (i) {}
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 8
do {
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 11
// PGOUSE: br {{.*}} !prof ![[EE6:[0-9]+]]
if (i > 75)
return;
else
i++;
// PGOUSE: br {{.*}} !prof ![[EE7:[0-9]+]]
} while (i < 100);
// PGOGEN: store {{.*}} @[[EEC]], i64 0, i64 12
// Never reached -> no weights
if (i) {}
// PGOGEN-NOT: store {{.*}} @[[EEC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
}
// PGOGEN-LABEL: @jumps()
// PGOUSE-LABEL: @jumps()
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 0
void jumps() {
int i;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 1
// PGOUSE: br {{.*}} !prof ![[JM1:[0-9]+]]
for (i = 0; i < 2; ++i) {
goto outofloop;
// Never reached -> no weights
if (i) {}
}
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 5
outofloop:
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 6
// PGOUSE: br {{.*}} !prof ![[JM2:[0-9]+]]
if (i) {}
goto loop1;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 7
// PGOUSE: br {{.*}} !prof ![[JM3:[0-9]+]]
while (i) {
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 10
loop1:
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 11
// PGOUSE: br {{.*}} !prof ![[JM4:[0-9]+]]
if (i) {}
}
goto loop2;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 12
first:
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 13
second:
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 14
third:
i++;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 15
// PGOUSE: br {{.*}} !prof ![[JM5:[0-9]+]]
if (i < 3)
goto loop2;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 16
// PGOUSE: br {{.*}} !prof ![[JM6:[0-9]+]]
while (i < 3) {
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 19
loop2:
// PGOUSE: switch {{.*}} [
// PGOUSE: ], !prof ![[JM7:[0-9]+]]
switch (i) {
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 21
case 0:
goto first;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 22
case 1:
goto second;
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 23
case 2:
goto third;
}
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 20
}
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 24
// PGOUSE: br {{.*}} !prof ![[JM8:[0-9]+]]
for (i = 0; i < 10; ++i) {
goto withinloop;
// never reached -> no weights
if (i) {}
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 28
withinloop:
// PGOGEN: store {{.*}} @[[JMC]], i64 0, i64 29
// PGOUSE: br {{.*}} !prof ![[JM9:[0-9]+]]
if (i) {}
}
// PGOGEN-NOT: store {{.*}} @[[JMC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
}
// PGOGEN-LABEL: @switches()
// PGOUSE-LABEL: @switches()
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 0
void switches() {
static int weights[] = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5};
// No cases -> no weights
switch (weights[0]) {
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 2
default:
break;
}
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 1
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 3
// PGOUSE: br {{.*}} !prof ![[SW1:[0-9]+]]
for (int i = 0, len = sizeof(weights) / sizeof(weights[0]); i < len; ++i) {
// PGOUSE: switch {{.*}} [
// PGOUSE: ], !prof ![[SW2:[0-9]+]]
switch (i[weights]) {
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 7
case 1:
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 8
// PGOUSE: br {{.*}} !prof ![[SW3:[0-9]+]]
if (i) {}
// fallthrough
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 9
case 2:
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 10
// PGOUSE: br {{.*}} !prof ![[SW4:[0-9]+]]
if (i) {}
break;
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 11
case 3:
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 12
// PGOUSE: br {{.*}} !prof ![[SW5:[0-9]+]]
if (i) {}
continue;
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 13
case 4:
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 14
// PGOUSE: br {{.*}} !prof ![[SW6:[0-9]+]]
if (i) {}
// PGOUSE: switch {{.*}} [
// PGOUSE: ], !prof ![[SW7:[0-9]+]]
switch (i) {
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 16
case 6 ... 9:
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 17
// PGOUSE: br {{.*}} !prof ![[SW8:[0-9]+]]
if (i) {}
continue;
}
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 15
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 18
default:
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 19
// PGOUSE: br {{.*}} !prof ![[SW9:[0-9]+]]
if (i == len - 1)
return;
}
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 6
}
// PGOGEN: store {{.*}} @[[SWC]], i64 0, i64 20
// Never reached -> no weights
if (weights[0]) {}
// PGOGEN-NOT: store {{.*}} @[[SWC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
}
// PGOGEN-LABEL: @big_switch()
// PGOUSE-LABEL: @big_switch()
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 0
void big_switch() {
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 1
// PGOUSE: br {{.*}} !prof ![[BS1:[0-9]+]]
for (int i = 0; i < 32; ++i) {
// PGOUSE: switch {{.*}} [
// PGOUSE: ], !prof ![[BS2:[0-9]+]]
switch (1 << i) {
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 5
case (1 << 0):
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 6
// PGOUSE: br {{.*}} !prof ![[BS3:[0-9]+]]
if (i) {}
// fallthrough
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 7
case (1 << 1):
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 8
// PGOUSE: br {{.*}} !prof ![[BS4:[0-9]+]]
if (i) {}
break;
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 9
case (1 << 2) ... (1 << 12):
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 10
// PGOUSE: br {{.*}} !prof ![[BS5:[0-9]+]]
if (i) {}
break;
// The branch for the large case range above appears after the case body
// PGOUSE: br {{.*}} !prof ![[BS6:[0-9]+]]
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 11
case (1 << 13):
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 12
// PGOUSE: br {{.*}} !prof ![[BS7:[0-9]+]]
if (i) {}
break;
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 13
case (1 << 14) ... (1 << 28):
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 14
// PGOUSE: br {{.*}} !prof ![[BS8:[0-9]+]]
if (i) {}
break;
// The branch for the large case range above appears after the case body
// PGOUSE: br {{.*}} !prof ![[BS9:[0-9]+]]
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 15
case (1 << 29) ... ((1 << 29) + 1):
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 16
// PGOUSE: br {{.*}} !prof ![[BS10:[0-9]+]]
if (i) {}
break;
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 17
default:
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 18
// PGOUSE: br {{.*}} !prof ![[BS11:[0-9]+]]
if (i) {}
break;
}
// PGOGEN: store {{.*}} @[[BSC]], i64 0, i64 4
}
// PGOGEN-NOT: store {{.*}} @[[BSC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
// PGOUSE: ret void
}
// PGOGEN-LABEL: @no_usable_data()
// PGOUSE-LABEL: @no_usable_data()
// PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 0
void no_usable_data() {
// The input data for PGOUSE is deliberately invalid for this function, so
// that we can test that we reject and ignore it properly.
int i = 0;
// PGOGEN: store {{.*}} @[[NOC]], i64 0, i64 1
if (i) {}
// PGOGEN-NOT: store {{.*}} @[[NOC]],
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
}
// PGOUSE-DAG: ![[SL1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
// PGOUSE-DAG: ![[SL2]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
// PGOUSE-DAG: ![[SL3]] = metadata !{metadata !"branch_weights", i32 76, i32 2}
// PGOUSE-DAG: ![[EE1]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[EE2]] = metadata !{metadata !"branch_weights", i32 52, i32 1}
// PGOUSE-DAG: ![[EE3]] = metadata !{metadata !"branch_weights", i32 2, i32 51}
// PGOUSE-DAG: ![[EE4]] = metadata !{metadata !"branch_weights", i32 26, i32 26}
// PGOUSE-DAG: ![[EE5]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
// PGOUSE-DAG: ![[EE6]] = metadata !{metadata !"branch_weights", i32 2, i32 26}
// PGOUSE-DAG: ![[EE7]] = metadata !{metadata !"branch_weights", i32 26, i32 1}
// PGOUSE-DAG: ![[IF1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
// PGOUSE-DAG: ![[IF2]] = metadata !{metadata !"branch_weights", i32 51, i32 51}
// PGOUSE-DAG: ![[IF3]] = metadata !{metadata !"branch_weights", i32 51, i32 1}
// PGOUSE-DAG: ![[IF4]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
// PGOUSE-DAG: ![[IF5]] = metadata !{metadata !"branch_weights", i32 34, i32 1}
// PGOUSE-DAG: ![[IF6]] = metadata !{metadata !"branch_weights", i32 17, i32 2}
// PGOUSE-DAG: ![[IF7]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
// PGOUSE-DAG: ![[IF8]] = metadata !{metadata !"branch_weights", i32 100, i32 2}
// PGOUSE-DAG: ![[JM1]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
// PGOUSE-DAG: ![[JM2]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[JM3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[JM4]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[JM5]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
// PGOUSE-DAG: ![[JM6]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[JM7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2}
// PGOUSE-DAG: ![[JM8]] = metadata !{metadata !"branch_weights", i32 11, i32 2}
// PGOUSE-DAG: ![[JM9]] = metadata !{metadata !"branch_weights", i32 10, i32 2}
// PGOUSE-DAG: ![[SW1]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
// PGOUSE-DAG: ![[SW2]] = metadata !{metadata !"branch_weights", i32 6, i32 2, i32 3, i32 4, i32 5}
// PGOUSE-DAG: ![[SW3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[SW4]] = metadata !{metadata !"branch_weights", i32 3, i32 2}
// PGOUSE-DAG: ![[SW5]] = metadata !{metadata !"branch_weights", i32 4, i32 1}
// PGOUSE-DAG: ![[SW6]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
// PGOUSE-DAG: ![[SW7]] = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 2, i32 2, i32 2}
// PGOUSE-DAG: ![[SW8]] = metadata !{metadata !"branch_weights", i32 5, i32 1}
// PGOUSE-DAG: ![[SW9]] = metadata !{metadata !"branch_weights", i32 2, i32 5}
// PGOUSE-DAG: ![[BS1]] = metadata !{metadata !"branch_weights", i32 33, i32 2}
// PGOUSE-DAG: ![[BS2]] = metadata !{metadata !"branch_weights", i32 29, i32 2, i32 2, i32 2, i32 2, i32 1}
// PGOUSE-DAG: ![[BS3]] = metadata !{metadata !"branch_weights", i32 1, i32 2}
// PGOUSE-DAG: ![[BS4]] = metadata !{metadata !"branch_weights", i32 2, i32 2}
// PGOUSE-DAG: ![[BS5]] = metadata !{metadata !"branch_weights", i32 12, i32 1}
// PGOUSE-DAG: ![[BS6]] = metadata !{metadata !"branch_weights", i32 12, i32 3}
// PGOUSE-DAG: ![[BS7]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
// PGOUSE-DAG: ![[BS8]] = metadata !{metadata !"branch_weights", i32 16, i32 1}
// PGOUSE-DAG: ![[BS9]] = metadata !{metadata !"branch_weights", i32 16, i32 14}
// PGOUSE-DAG: ![[BS10]] = metadata !{metadata !"branch_weights", i32 2, i32 1}
// PGOUSE-DAG: ![[BS11]] = metadata !{metadata !"branch_weights", i32 3, i32 1}
int main(int argc, const char *argv[]) {
simple_loops();
conditionals();
early_exits();
jumps();
switches();
big_switch();
no_usable_data();
return 0;
}

View File

@ -0,0 +1,16 @@
_Z6throwsv 11
1
100
0
0
100
66
33
17
50
33
100
main 1
1

View File

@ -0,0 +1,73 @@
// Test that instrumentation based profiling feeds branch prediction
// correctly. This tests both generation of profile data and use of the same,
// and the input file for the -fprofile-instr-use case is expected to be result
// of running the program generated by the -fprofile-instr-generate case. As
// such, main() should call every function in this test.
// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN %s
// RUN: %clangxx %s -o - -emit-llvm -S -fprofile-instr-generate | FileCheck -check-prefix=PGOGEN-EXC %s
// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE %s
// RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%S/Inputs/instr-profile.pgodata | FileCheck -check-prefix=PGOUSE-EXC %s
// PGOGEN: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
// PGOGEN-EXC: @[[THC:__llvm_pgo_ctr[0-9]*]] = private global [11 x i64] zeroinitializer
// PGOGEN-LABEL: @_Z6throwsv()
// PGOUSE-LABEL: @_Z6throwsv()
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 0
void throws() {
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 1
// PGOUSE: br {{.*}} !prof ![[TH1:[0-9]+]]
for (int i = 0; i < 100; ++i) {
try {
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 5
// PGOUSE: br {{.*}} !prof ![[TH2:[0-9]+]]
if (i % 3) {
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 6
// PGOUSE: br {{.*}} !prof ![[TH3:[0-9]+]]
if (i < 50)
throw 1;
} else {
// The catch block may be emitted after the throw above, we can skip it
// by looking for an else block, but this will break if anyone puts an
// else in the catch
// PGOUSE: if.else{{.*}}:
// PGOGEN: if.else{{.*}}:
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 7
// PGOUSE: br {{.*}} !prof ![[TH4:[0-9]+]]
if (i >= 50)
throw 0;
}
} catch (int e) {
// PGOUSE-EXC: catch{{.*}}:
// PGOGEN-EXC: catch{{.*}}:
// PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 8
// PGOGEN-EXC: store {{.*}} @[[THC]], i64 0, i64 9
// PGOUSE-EXC: br {{.*}} !prof ![[TH5:[0-9]+]]
if (e) {}
}
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 4
// PGOGEN: store {{.*}} @[[THC]], i64 0, i64 10
// PGOUSE: br {{.*}} !prof ![[TH6:[0-9]+]]
if (i < 100) {}
}
// PGOUSE-NOT: br {{.*}} !prof ![0-9]+
// PGOUSE: ret void
}
// PGOUSE-DAG: ![[TH1]] = metadata !{metadata !"branch_weights", i32 101, i32 2}
// PGOUSE-DAG: ![[TH2]] = metadata !{metadata !"branch_weights", i32 67, i32 35}
// PGOUSE-DAG: ![[TH3]] = metadata !{metadata !"branch_weights", i32 34, i32 34}
// PGOUSE-DAG: ![[TH4]] = metadata !{metadata !"branch_weights", i32 18, i32 18}
// PGOUSE-EXC: ![[TH5]] = metadata !{metadata !"branch_weights", i32 34, i32 18}
// PGOUSE-DAG: ![[TH6]] = metadata !{metadata !"branch_weights", i32 101, i32 1}
int main(int argc, const char *argv[]) {
throws();
return 0;
}