2014-01-07 06:27:43 +08:00
|
|
|
//===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-01-07 06:27:43 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// Instrumentation-based profile-guided optimization
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-08-14 00:25:19 +08:00
|
|
|
#ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENPGO_H
|
|
|
|
#define LLVM_CLANG_LIB_CODEGEN_CODEGENPGO_H
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
#include "CGBuilder.h"
|
|
|
|
#include "CodeGenModule.h"
|
|
|
|
#include "CodeGenTypes.h"
|
2016-01-24 06:50:44 +08:00
|
|
|
#include "llvm/ProfileData/InstrProfReader.h"
|
2016-01-24 08:56:19 +08:00
|
|
|
#include <array>
|
2014-03-09 19:36:40 +08:00
|
|
|
#include <memory>
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
namespace clang {
|
|
|
|
namespace CodeGen {
|
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
/// Per-function PGO state.
|
2014-01-07 06:27:43 +08:00
|
|
|
class CodeGenPGO {
|
|
|
|
private:
|
|
|
|
CodeGenModule &CGM;
|
2014-12-03 07:15:30 +08:00
|
|
|
std::string FuncName;
|
2014-12-09 03:04:51 +08:00
|
|
|
llvm::GlobalVariable *FuncNameVar;
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2016-01-24 08:56:19 +08:00
|
|
|
std::array <unsigned, llvm::IPVK_Last + 1> NumValueSites;
|
2014-01-07 06:27:43 +08:00
|
|
|
unsigned NumRegionCounters;
|
2014-03-19 05:58:06 +08:00
|
|
|
uint64_t FunctionHash;
|
2014-03-27 03:26:05 +08:00
|
|
|
std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
|
|
|
|
std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
|
2016-01-24 06:50:44 +08:00
|
|
|
std::unique_ptr<llvm::InstrProfRecord> ProfRecord;
|
2014-12-03 06:38:52 +08:00
|
|
|
std::vector<uint64_t> RegionCounts;
|
2014-01-07 06:27:43 +08:00
|
|
|
uint64_t CurrentRegionCount;
|
|
|
|
|
|
|
|
public:
|
2020-03-02 21:58:21 +08:00
|
|
|
CodeGenPGO(CodeGenModule &CGModule)
|
|
|
|
: CGM(CGModule), FuncNameVar(nullptr), NumValueSites({{0}}),
|
2019-10-03 05:05:21 +08:00
|
|
|
NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {}
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
/// Whether or not we have PGO region data for the current function. This is
|
|
|
|
/// false both when we have no data at all and when our data has been
|
|
|
|
/// discarded.
|
2014-12-03 06:38:52 +08:00
|
|
|
bool haveRegionCounts() const { return !RegionCounts.empty(); }
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
/// Return the counter value of the current region.
|
|
|
|
uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Set the counter value for the current region. This is used to keep track
|
|
|
|
/// of changes to the most recent counter from control flow and non-local
|
|
|
|
/// exits.
|
|
|
|
void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
|
|
|
|
/// Check if an execution count is known for a given statement. If so, return
|
|
|
|
/// true and put the value in Count; else return false.
|
2020-10-18 20:11:41 +08:00
|
|
|
Optional<uint64_t> getStmtCount(const Stmt *S) const {
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
if (!StmtCountMap)
|
2015-04-13 20:23:19 +08:00
|
|
|
return None;
|
|
|
|
auto I = StmtCountMap->find(S);
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
if (I == StmtCountMap->end())
|
2015-04-13 20:23:19 +08:00
|
|
|
return None;
|
|
|
|
return I->second;
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// If the execution count for the current statement is known, record that
|
|
|
|
/// as the current count.
|
|
|
|
void setCurrentStmt(const Stmt *S) {
|
2015-04-13 20:23:19 +08:00
|
|
|
if (auto Count = getStmtCount(S))
|
|
|
|
setCurrentRegionCount(*Count);
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
}
|
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
/// Assign counters to regions and configure them for PGO of a given
|
|
|
|
/// function. Does nothing if instrumentation is not enabled and either
|
|
|
|
/// generates global variables or associates PGO data with each of the
|
|
|
|
/// counters depending on whether we are generating or using instrumentation.
|
2015-12-06 22:32:39 +08:00
|
|
|
void assignRegionCounters(GlobalDecl GD, llvm::Function *Fn);
|
2014-08-05 02:41:51 +08:00
|
|
|
/// Emit a coverage mapping range with a counter zero
|
|
|
|
/// for an unused declaration.
|
|
|
|
void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
|
|
|
|
llvm::GlobalValue::LinkageTypes Linkage);
|
2016-01-24 06:50:44 +08:00
|
|
|
// Insert instrumentation or attach profile metadata at value sites
|
|
|
|
void valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
|
|
|
|
llvm::Instruction *ValueSite, llvm::Value *ValuePtr);
|
[PGO] Don't reference functions unless value profiling is enabled
This reduces the size of chrome.dll.pdb built with optimizations,
coverage, and line table info from 4,690,210,816 to 2,181,128,192, which
makes it possible to fit under the 4GB limit.
This change can greatly reduce binary size in coverage builds, which do
not need value profiling. IR PGO builds are unaffected. There is a minor
behavior change for frontend PGO.
PGO and coverage both use InstrProfiling to create profile data with
counters. PGO records the address of each function in the __profd_
global. It is used later to map runtime function pointer values back to
source-level function names. Coverage does not appear to use this
information.
Recording the address of every function with code coverage drastically
increases code size. Consider this program:
void foo();
void bar();
inline void inlineMe(int x) {
if (x > 0)
foo();
else
bar();
}
int getVal();
int main() { inlineMe(getVal()); }
With code coverage, the InstrProfiling pass runs before inlining, and it
captures the address of inlineMe in the __profd_ global. This greatly
increases code size, because now the compiler can no longer delete
trivial code.
One downside to this approach is that users of frontend PGO must apply
the -mllvm -enable-value-profiling flag globally in TUs that enable PGO.
Otherwise, some inline virtual method addresses may not be recorded and
will not be able to be promoted. My assumption is that this mllvm flag
is not popular, and most frontend PGO users don't enable it.
Differential Revision: https://reviews.llvm.org/D102818
2021-05-14 05:43:22 +08:00
|
|
|
|
|
|
|
// Set a module flag indicating if value profiling is enabled.
|
|
|
|
void setValueProfilingFlag(llvm::Module &M);
|
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
private:
|
2014-03-06 12:55:41 +08:00
|
|
|
void setFuncName(llvm::Function *Fn);
|
2014-08-05 02:41:51 +08:00
|
|
|
void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
|
2014-01-07 06:27:43 +08:00
|
|
|
void mapRegionCounters(const Decl *D);
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
void computeRegionCounts(const Decl *D);
|
2014-04-19 05:52:00 +08:00
|
|
|
void applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
|
|
|
|
llvm::Function *Fn);
|
2014-06-26 09:45:07 +08:00
|
|
|
void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
|
|
|
|
bool IsInMainFile);
|
2016-07-12 06:57:44 +08:00
|
|
|
bool skipRegionMappingForDecl(const Decl *D);
|
2014-08-05 02:41:51 +08:00
|
|
|
void emitCounterRegionMapping(const Decl *D);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
public:
|
2017-02-25 14:35:45 +08:00
|
|
|
void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
|
|
|
|
llvm::Value *StepV);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
/// Return the region count for the counter at the given index.
|
|
|
|
uint64_t getRegionCount(const Stmt *S) {
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!RegionCounterMap)
|
2014-01-07 06:27:43 +08:00
|
|
|
return 0;
|
|
|
|
if (!haveRegionCounts())
|
|
|
|
return 0;
|
2015-04-24 07:06:47 +08:00
|
|
|
return RegionCounts[(*RegionCounterMap)[S]];
|
2014-01-07 06:27:43 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace CodeGen
|
|
|
|
} // end namespace clang
|
|
|
|
|
|
|
|
#endif
|