2014-01-07 06:27:43 +08:00
|
|
|
//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Instrumentation-based profile-guided optimization
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#ifndef CLANG_CODEGEN_CODEGENPGO_H
|
|
|
|
#define CLANG_CODEGEN_CODEGENPGO_H
|
|
|
|
|
|
|
|
#include "CGBuilder.h"
|
|
|
|
#include "CodeGenModule.h"
|
|
|
|
#include "CodeGenTypes.h"
|
|
|
|
#include "clang/Frontend/CodeGenOptions.h"
|
|
|
|
#include "llvm/ADT/StringMap.h"
|
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
2014-03-09 19:36:40 +08:00
|
|
|
#include <memory>
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
namespace clang {
|
|
|
|
namespace CodeGen {
|
|
|
|
class RegionCounter;
|
|
|
|
|
|
|
|
/// Per-function PGO state. This class should generally not be used directly,
|
|
|
|
/// but instead through the CodeGenFunction and RegionCounter types.
|
|
|
|
class CodeGenPGO {
|
|
|
|
private:
|
|
|
|
CodeGenModule &CGM;
|
2014-03-27 03:26:05 +08:00
|
|
|
std::unique_ptr<std::string> PrefixedFuncName;
|
2014-03-18 05:18:30 +08:00
|
|
|
StringRef RawFuncName;
|
2014-03-21 06:49:50 +08:00
|
|
|
llvm::GlobalValue::LinkageTypes VarLinkage;
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
unsigned NumRegionCounters;
|
2014-03-19 05:58:06 +08:00
|
|
|
uint64_t FunctionHash;
|
2014-01-07 06:27:43 +08:00
|
|
|
llvm::GlobalVariable *RegionCounters;
|
2014-03-27 03:26:05 +08:00
|
|
|
std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
|
|
|
|
std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
|
|
|
|
std::unique_ptr<std::vector<uint64_t>> RegionCounts;
|
2014-01-07 06:27:43 +08:00
|
|
|
uint64_t CurrentRegionCount;
|
|
|
|
|
|
|
|
public:
|
|
|
|
CodeGenPGO(CodeGenModule &CGM)
|
2014-05-21 13:09:00 +08:00
|
|
|
: CGM(CGM), NumRegionCounters(0), FunctionHash(0),
|
|
|
|
RegionCounters(nullptr), CurrentRegionCount(0) {}
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
/// Whether or not we have PGO region data for the current function. This is
|
|
|
|
/// false both when we have no data at all and when our data has been
|
|
|
|
/// discarded.
|
2014-05-21 13:09:00 +08:00
|
|
|
bool haveRegionCounts() const { return RegionCounts != nullptr; }
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2014-03-06 12:55:41 +08:00
|
|
|
/// Get the string used to identify this function in the profile data.
|
|
|
|
/// For functions with local linkage, this includes the main file name.
|
2014-03-18 05:18:30 +08:00
|
|
|
StringRef getFuncName() const { return StringRef(*PrefixedFuncName); }
|
|
|
|
std::string getFuncVarName(StringRef VarName) const {
|
2014-03-21 04:00:41 +08:00
|
|
|
return ("__llvm_profile_" + VarName + "_" + RawFuncName).str();
|
2014-03-18 05:18:30 +08:00
|
|
|
}
|
2014-03-06 12:55:41 +08:00
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Return the counter value of the current region.
|
|
|
|
uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Set the counter value for the current region. This is used to keep track
|
|
|
|
/// of changes to the most recent counter from control flow and non-local
|
|
|
|
/// exits.
|
|
|
|
void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
2014-01-14 05:24:18 +08:00
|
|
|
/// Indicate that the current region is never reached, and thus should have a
|
|
|
|
/// counter value of zero. This is important so that subsequent regions can
|
|
|
|
/// correctly track their parent counts.
|
|
|
|
void setCurrentRegionUnreachable() { setCurrentRegionCount(0); }
|
2014-01-07 06:27:43 +08:00
|
|
|
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
/// Check if an execution count is known for a given statement. If so, return
|
|
|
|
/// true and put the value in Count; else return false.
|
|
|
|
bool getStmtCount(const Stmt *S, uint64_t &Count) {
|
|
|
|
if (!StmtCountMap)
|
|
|
|
return false;
|
|
|
|
llvm::DenseMap<const Stmt*, uint64_t>::const_iterator
|
|
|
|
I = StmtCountMap->find(S);
|
|
|
|
if (I == StmtCountMap->end())
|
|
|
|
return false;
|
|
|
|
Count = I->second;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If the execution count for the current statement is known, record that
|
|
|
|
/// as the current count.
|
|
|
|
void setCurrentStmt(const Stmt *S) {
|
|
|
|
uint64_t Count;
|
|
|
|
if (getStmtCount(S, Count))
|
|
|
|
setCurrentRegionCount(Count);
|
|
|
|
}
|
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Calculate branch weights appropriate for PGO data
|
|
|
|
llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
|
|
|
|
llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
llvm::MDNode *createLoopWeights(const Stmt *Cond, RegionCounter &Cnt);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
/// Assign counters to regions and configure them for PGO of a given
|
|
|
|
/// function. Does nothing if instrumentation is not enabled and either
|
|
|
|
/// generates global variables or associates PGO data with each of the
|
|
|
|
/// counters depending on whether we are generating or using instrumentation.
|
2014-03-06 12:55:41 +08:00
|
|
|
void assignRegionCounters(const Decl *D, llvm::Function *Fn);
|
2014-03-18 05:18:30 +08:00
|
|
|
/// Emit static data structures for instrumentation data.
|
|
|
|
void emitInstrumentationData();
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Clean up region counter state. Must be called if assignRegionCounters is
|
|
|
|
/// used.
|
|
|
|
void destroyRegionCounters();
|
2014-03-18 05:18:30 +08:00
|
|
|
/// Emit static initialization code, if any.
|
2014-01-07 06:27:43 +08:00
|
|
|
static llvm::Function *emitInitialization(CodeGenModule &CGM);
|
|
|
|
|
|
|
|
private:
|
2014-03-06 12:55:41 +08:00
|
|
|
void setFuncName(llvm::Function *Fn);
|
2014-01-07 06:27:43 +08:00
|
|
|
void mapRegionCounters(const Decl *D);
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
void computeRegionCounts(const Decl *D);
|
2014-04-19 05:52:00 +08:00
|
|
|
void applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
|
|
|
|
llvm::Function *Fn);
|
|
|
|
void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader);
|
2014-01-07 06:27:43 +08:00
|
|
|
void emitCounterVariables();
|
2014-03-18 05:18:30 +08:00
|
|
|
llvm::GlobalVariable *buildDataVar();
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
/// Emit code to increment the counter at the given index
|
|
|
|
void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
|
|
|
|
|
|
|
|
/// Return the region counter for the given statement. This should only be
|
|
|
|
/// called on statements that have a dedicated counter.
|
|
|
|
unsigned getRegionCounter(const Stmt *S) {
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!RegionCounterMap)
|
2014-01-07 06:27:43 +08:00
|
|
|
return 0;
|
|
|
|
return (*RegionCounterMap)[S];
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the region count for the counter at the given index.
|
|
|
|
uint64_t getRegionCount(unsigned Counter) {
|
|
|
|
if (!haveRegionCounts())
|
|
|
|
return 0;
|
|
|
|
return (*RegionCounts)[Counter];
|
|
|
|
}
|
|
|
|
|
|
|
|
friend class RegionCounter;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// A counter for a particular region. This is the primary interface through
|
|
|
|
/// which clients manage PGO counters and their values.
|
|
|
|
class RegionCounter {
|
|
|
|
CodeGenPGO *PGO;
|
|
|
|
unsigned Counter;
|
|
|
|
uint64_t Count;
|
|
|
|
uint64_t ParentCount;
|
|
|
|
uint64_t RegionCount;
|
|
|
|
int64_t Adjust;
|
|
|
|
|
|
|
|
RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
|
|
|
|
: PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
|
|
|
|
ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
|
|
|
|
|
|
|
|
public:
|
|
|
|
RegionCounter(CodeGenPGO &PGO, const Stmt *S)
|
|
|
|
: PGO(&PGO), Counter(PGO.getRegionCounter(S)),
|
|
|
|
Count(PGO.getRegionCount(Counter)),
|
|
|
|
ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
|
|
|
|
|
|
|
|
/// Get the value of the counter. In most cases this is the number of times
|
|
|
|
/// the region of the counter was entered, but for switch labels it's the
|
|
|
|
/// number of direct jumps to that label.
|
|
|
|
uint64_t getCount() const { return Count; }
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Get the value of the counter with adjustments applied. Adjustments occur
|
2014-02-18 03:21:03 +08:00
|
|
|
/// when control enters or leaves the region abnormally; i.e., if there is a
|
2014-01-07 06:27:43 +08:00
|
|
|
/// jump to a label within the region, or if the function can return from
|
|
|
|
/// within the region. The adjusted count, then, is the value of the counter
|
|
|
|
/// at the end of the region.
|
|
|
|
uint64_t getAdjustedCount() const {
|
|
|
|
return Count + Adjust;
|
|
|
|
}
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
2014-02-18 03:21:03 +08:00
|
|
|
/// Get the value of the counter in this region's parent, i.e., the region
|
|
|
|
/// that was active when this region began. This is useful for deriving
|
|
|
|
/// counts in implicitly counted regions, like the false case of a condition
|
|
|
|
/// or the normal exits of a loop.
|
2014-01-07 06:27:43 +08:00
|
|
|
uint64_t getParentCount() const { return ParentCount; }
|
|
|
|
|
|
|
|
/// Activate the counter by emitting an increment and starting to track
|
|
|
|
/// adjustments. If AddIncomingFallThrough is true, the current region count
|
|
|
|
/// will be added to the counter for the purposes of tracking the region.
|
|
|
|
void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
beginRegion(AddIncomingFallThrough);
|
|
|
|
PGO->emitCounterIncrement(Builder, Counter);
|
|
|
|
}
|
|
|
|
void beginRegion(bool AddIncomingFallThrough=false) {
|
2014-01-07 06:27:43 +08:00
|
|
|
RegionCount = Count;
|
|
|
|
if (AddIncomingFallThrough)
|
|
|
|
RegionCount += PGO->getCurrentRegionCount();
|
|
|
|
PGO->setCurrentRegionCount(RegionCount);
|
|
|
|
}
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// For counters on boolean branches, begins tracking adjustments for the
|
|
|
|
/// uncounted path.
|
|
|
|
void beginElseRegion() {
|
|
|
|
RegionCount = ParentCount - Count;
|
|
|
|
PGO->setCurrentRegionCount(RegionCount);
|
|
|
|
}
|
|
|
|
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
/// Reset the current region count.
|
|
|
|
void setCurrentRegionCount(uint64_t CurrentCount) {
|
|
|
|
RegionCount = CurrentCount;
|
|
|
|
PGO->setCurrentRegionCount(RegionCount);
|
|
|
|
}
|
|
|
|
|
2014-01-14 05:24:22 +08:00
|
|
|
/// Adjust for non-local control flow after emitting a subexpression or
|
|
|
|
/// substatement. This must be called to account for constructs such as gotos,
|
|
|
|
/// labels, and returns, so that we can ensure that our region's count is
|
|
|
|
/// correct in the code that follows.
|
|
|
|
void adjustForControlFlow() {
|
2014-01-07 06:27:43 +08:00
|
|
|
Adjust += PGO->getCurrentRegionCount() - RegionCount;
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
// Reset the region count in case this is called again later.
|
|
|
|
RegionCount = PGO->getCurrentRegionCount();
|
2014-01-07 06:27:43 +08:00
|
|
|
}
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
|
|
|
/// Commit all adjustments to the current region. If the region is a loop,
|
|
|
|
/// the LoopAdjust value should be the count of all the breaks and continues
|
|
|
|
/// from the loop, to compensate for those counts being deducted from the
|
|
|
|
/// adjustments for the body of the loop.
|
|
|
|
void applyAdjustmentsToRegion(uint64_t LoopAdjust) {
|
|
|
|
PGO->setCurrentRegionCount(ParentCount + Adjust + LoopAdjust);
|
2014-01-07 06:27:43 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace CodeGen
|
|
|
|
} // end namespace clang
|
|
|
|
|
|
|
|
#endif
|