Add -debug-info-for-profiling to emit more debug info for sample pgo profile collection

Summary:
SamplePGO binaries built with -gmlt to collect profile. The current -gmlt debug info is limited, and we need some additional info:

* start line of all subprograms
* linkage name of all subprograms
* standalone subprograms (functions that has neither inlined nor been inlined)

This patch adds these information to the -gmlt binary. The impact on speccpu2006 binary size (size increase comparing with -g0 binary, also includes data for -g binary, which does not change with this patch):

               -gmlt(orig) -gmlt(patched) -g
433.milc       4.68%       5.40%          19.73%
444.namd       8.45%       8.93%          45.99%
447.dealII     97.43%      115.21%        374.89%
450.soplex     27.75%      31.88%         126.04%
453.povray     21.81%      26.16%         92.03%
470.lbm        0.60%       0.67%          1.96%
482.sphinx3    5.77%       6.47%          26.17%
400.perlbench  17.81%      19.43%         73.08%
401.bzip2      3.73%       3.92%          12.18%
403.gcc        31.75%      34.48%         122.75%
429.mcf        0.78%       0.88%          3.89%
445.gobmk      6.08%       7.92%          42.27%
456.hmmer      10.36%      11.25%         35.23%
458.sjeng      5.08%       5.42%          14.36%
462.libquantum 1.71%       1.96%          6.36%
464.h264ref    15.61%      16.56%         43.92%
471.omnetpp    11.93%      15.84%         60.09%
473.astar      3.11%       3.69%          14.18%
483.xalancbmk  56.29%      81.63%         353.22%
geomean        15.60%      18.30%         57.81%

Debug info size change for -gmlt binary with this patch:

433.milc       13.46%
444.namd       5.35%
447.dealII     18.21%
450.soplex     14.68%
453.povray     19.65%
470.lbm        6.03%
482.sphinx3    11.21%
400.perlbench  8.91%
401.bzip2      4.41%
403.gcc        8.56%
429.mcf        8.24%
445.gobmk      29.47%
456.hmmer      8.19%
458.sjeng      6.05%
462.libquantum 11.23%
464.h264ref    5.93%
471.omnetpp    31.89%
473.astar      16.20%
483.xalancbmk  44.62%
geomean        16.83%

Reviewers: davidxl, echristo, dblaikie

Reviewed By: echristo, dblaikie

Subscribers: aprantl, probinson, llvm-commits, mehdi_amini

Differential Revision: https://reviews.llvm.org/D25434

llvm-svn: 292457
This commit is contained in:
Dehao Chen 2017-01-19 00:44:11 +00:00
parent 230867e583
commit 1ce8d6ca59
7 changed files with 35 additions and 11 deletions

View File

@ -108,7 +108,7 @@ namespace llvm {
DisableIntegratedAS(false), CompressDebugSections(false),
RelaxELFRelocations(false), FunctionSections(false),
DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
EmulatedTLS(false), EnableIPRA(false),
EmulatedTLS(false), EnableIPRA(false), DebugInfoForProfiling(false),
FloatABIType(FloatABI::Default),
AllowFPOpFusion(FPOpFusion::Standard),
ThreadModel(ThreadModel::POSIX),
@ -225,6 +225,9 @@ namespace llvm {
/// This flag enables InterProcedural Register Allocation (IPRA).
unsigned EnableIPRA : 1;
/// This flag enables emitting extra debug info for sample profiling.
unsigned DebugInfoForProfiling : 1;
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
@ -299,7 +302,8 @@ inline bool operator==(const TargetOptions &LHS,
ARE_EQUAL(FPDenormalMode) &&
ARE_EQUAL(ExceptionModel) &&
ARE_EQUAL(MCOptions) &&
ARE_EQUAL(EnableIPRA);
ARE_EQUAL(EnableIPRA) &&
ARE_EQUAL(DebugInfoForProfiling);
#undef ARE_EQUAL
}

View File

@ -1211,8 +1211,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it.
if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
// is still needed as we need its source location.
if (!Asm->TM.Options.DebugInfoForProfiling &&
TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
assert(DbgValues.empty());

View File

@ -1180,8 +1180,12 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
}
void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool Minimal) {
if (!Minimal)
bool SkipSPAttributes) {
// If -fdebug-info-for-profiling is enabled, need to emit the subprogram
// and its source location.
bool SkipSPSourceLocation = SkipSPAttributes &&
!Asm->TM.Options.DebugInfoForProfiling;
if (!SkipSPSourceLocation)
if (applySubprogramDefinitionAttributes(SP, SPDie))
return;
@ -1189,11 +1193,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
// Skip the rest of the attributes under -gmlt to save space.
if (Minimal)
return;
if (!SkipSPSourceLocation)
addSourceLine(SPDie, SP);
addSourceLine(SPDie, SP);
// Skip the rest of the attributes under -gmlt to save space.
if (SkipSPAttributes)
return;
// Add the prototype if we have a prototype and we have a C like
// language.

View File

@ -256,7 +256,7 @@ public:
DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool Minimal = false);
bool SkipSPAttributes = false);
/// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *N);

View File

@ -35,6 +35,10 @@ cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
cl::desc("Enable interprocedural register allocation "
"to reduce load/store at procedure calls."));
cl::opt<bool> DebugInfoForProfiling(
"debug-info-for-profiling", cl::init(false), cl::Hidden,
cl::desc("Emit extra debug info to make sample profile more accurate."));
//---------------------------------------------------------------------------
// TargetMachine Class
//
@ -47,6 +51,8 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
RequireStructuredCFG(false), DefaultOptions(Options), Options(Options) {
if (EnableIPRA.getNumOccurrences())
this->Options.EnableIPRA = EnableIPRA;
if (DebugInfoForProfiling.getNumOccurrences())
this->Options.DebugInfoForProfiling = DebugInfoForProfiling;
}
TargetMachine::~TargetMachine() {

View File

@ -1,5 +1,6 @@
; REQUIRES: object-emission
; RUN: %llc_dwarf -O0 -filetype=obj < %S/../Inputs/gmlt.ll | llvm-dwarfdump - | FileCheck %S/../Inputs/gmlt.ll
; RUN: %llc_dwarf -O0 -filetype=obj -debug-info-for-profiling < %S/../Inputs/gmlt.ll | llvm-dwarfdump - | FileCheck %S/../Inputs/gmlt.ll --check-prefixes=PROFILING
; There's a darwin specific test in X86/gmlt, so it's okay to XFAIL this here.
; XFAIL: darwin

View File

@ -76,6 +76,12 @@
; CHECK-NOT: {{DW_TAG|DW_AT}}
; CHECK: NULL
; PROFILING: DW_TAG_subprogram
; PROFILING: DW_AT_name {{.*}} "f1"
; With -debug-info-for-profiling, we need to emit decl_file a-nd decl_line
; of the subprogram.
; PROFILING: DW_AT_decl_file
; PROFILING: DW_AT_decl_line
; CHECK: .debug_ranges contents: