forked from OSchip/llvm-project
Add an option to save the backend-produced YAML optimization record to a file
The backend now has the capability to save information from optimizations, the same information that can be used to generate optimization diagnostics but in machine-consumable form, into an output file. This can be enabled when using opt (see r282539), and this change enables it when using clang. The idea is that other tools will be able to consume these files, and perhaps in combination with the original source code, produce various kinds of optimization reports for users (and for compiler developers). We now have at-least two tools that can consume these files: * tools/llvm-opt-report * utils/opt-viewer Using the flag -fsave-optimization-record will cause the YAML file to be generated; the file name will be based on the output file name (if we're using -c or -S and have an output name), or the input file name. When we're using CUDA, or some other offloading mechanism, separate files are generated for each backend target. The output file name can be specified by the user using -foptimization-record-file=filename. Differential Revision: https://reviews.llvm.org/D25225 llvm-svn: 283834
This commit is contained in:
parent
4043ca7394
commit
8f96e82cb8
|
@ -507,6 +507,9 @@ def arcmt_modify : Flag<["-"], "arcmt-modify">,
|
|||
def arcmt_migrate : Flag<["-"], "arcmt-migrate">,
|
||||
HelpText<"Apply modifications and produces temporary files that conform to ARC">;
|
||||
|
||||
def opt_record_file : Separate<["-"], "opt-record-file">,
|
||||
HelpText<"File name to use for YAML optimization record output">;
|
||||
|
||||
def print_stats : Flag<["-"], "print-stats">,
|
||||
HelpText<"Print performance metrics and statistics">;
|
||||
def stats_file : Joined<["-"], "stats-file=">,
|
||||
|
|
|
@ -1192,6 +1192,15 @@ def ftemplate_backtrace_limit_EQ : Joined<["-"], "ftemplate-backtrace-limit=">,
|
|||
Group<f_Group>;
|
||||
def foperator_arrow_depth_EQ : Joined<["-"], "foperator-arrow-depth=">,
|
||||
Group<f_Group>;
|
||||
|
||||
def fsave_optimization_record : Flag<["-"], "fsave-optimization-record">,
|
||||
Group<f_Group>, HelpText<"Generate a YAML optimization record file">;
|
||||
def fno_save_optimization_record : Flag<["-"], "fno-save-optimization-record">,
|
||||
Group<f_Group>, Flags<[NoArgumentUnused]>;
|
||||
def foptimization_record_file_EQ : Joined<["-"], "foptimization-record-file=">,
|
||||
Group<f_Group>,
|
||||
HelpText<"Specify the file name of any generated YAML optimization record">;
|
||||
|
||||
def ftest_coverage : Flag<["-"], "ftest-coverage">, Group<f_Group>;
|
||||
def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>,
|
||||
HelpText<"Enable the loop vectorization passes">;
|
||||
|
|
|
@ -181,6 +181,10 @@ public:
|
|||
/// object file.
|
||||
std::vector<std::string> CudaGpuBinaryFileNames;
|
||||
|
||||
/// The name of the file to which the backend should save YAML optimization
|
||||
/// records.
|
||||
std::string OptRecordFile;
|
||||
|
||||
/// Regular expression to select optimizations for which we should enable
|
||||
/// optimization remarks. Transformation passes whose name matches this
|
||||
/// expression (and support this feature), will emit a diagnostic
|
||||
|
|
|
@ -33,6 +33,8 @@
|
|||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
#include "llvm/Support/Timer.h"
|
||||
#include "llvm/Support/ToolOutputFile.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include <memory>
|
||||
using namespace clang;
|
||||
using namespace llvm;
|
||||
|
@ -181,6 +183,24 @@ namespace clang {
|
|||
Ctx.setDiagnosticHandler(DiagnosticHandler, this);
|
||||
Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness);
|
||||
|
||||
std::unique_ptr<llvm::tool_output_file> OptRecordFile;
|
||||
if (!CodeGenOpts.OptRecordFile.empty()) {
|
||||
std::error_code EC;
|
||||
OptRecordFile =
|
||||
llvm::make_unique<llvm::tool_output_file>(CodeGenOpts.OptRecordFile,
|
||||
EC, sys::fs::F_None);
|
||||
if (EC) {
|
||||
Diags.Report(diag::err_cannot_open_file) <<
|
||||
CodeGenOpts.OptRecordFile << EC.message();
|
||||
return;
|
||||
}
|
||||
|
||||
Ctx.setDiagnosticsOutputFile(new yaml::Output(OptRecordFile->os()));
|
||||
|
||||
if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
|
||||
Ctx.setDiagnosticHotnessRequested(true);
|
||||
}
|
||||
|
||||
// Link LinkModule into this module if present, preserving its validity.
|
||||
for (auto &I : LinkModules) {
|
||||
unsigned LinkFlags = I.first;
|
||||
|
@ -198,6 +218,9 @@ namespace clang {
|
|||
Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
|
||||
|
||||
Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext);
|
||||
|
||||
if (OptRecordFile)
|
||||
OptRecordFile->keep();
|
||||
}
|
||||
|
||||
void HandleTagDeclDefinition(TagDecl *D) override {
|
||||
|
|
|
@ -6080,6 +6080,39 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
|||
CmdArgs.push_back("-fno-math-builtin");
|
||||
}
|
||||
|
||||
if (Args.hasFlag(options::OPT_fsave_optimization_record,
|
||||
options::OPT_fno_save_optimization_record, false)) {
|
||||
CmdArgs.push_back("-opt-record-file");
|
||||
|
||||
const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
|
||||
if (A) {
|
||||
CmdArgs.push_back(A->getValue());
|
||||
} else {
|
||||
SmallString<128> F;
|
||||
if (Output.isFilename() && (Args.hasArg(options::OPT_c) ||
|
||||
Args.hasArg(options::OPT_S))) {
|
||||
F = Output.getFilename();
|
||||
} else {
|
||||
// Use the compilation directory.
|
||||
F = llvm::sys::path::stem(Input.getBaseInput());
|
||||
|
||||
// If we're compiling for an offload architecture (i.e. a CUDA device),
|
||||
// we need to make the file name for the device compilation different
|
||||
// from the host compilation.
|
||||
if (!JA.isDeviceOffloading(Action::OFK_None) &&
|
||||
!JA.isDeviceOffloading(Action::OFK_Host)) {
|
||||
llvm::sys::path::replace_extension(F, "");
|
||||
F += JA.getOffloadingFileNamePrefix(Triple.normalize());
|
||||
F += "-";
|
||||
F += JA.getOffloadingArch();
|
||||
}
|
||||
}
|
||||
|
||||
llvm::sys::path::replace_extension(F, "opt.yaml");
|
||||
CmdArgs.push_back(Args.MakeArgString(F));
|
||||
}
|
||||
}
|
||||
|
||||
// Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
|
||||
//
|
||||
// FIXME: Now that PR4941 has been fixed this can be enabled.
|
||||
|
|
|
@ -826,6 +826,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
|
|||
Opts.LinkerOptions = Args.getAllArgValues(OPT_linker_option);
|
||||
bool NeedLocTracking = false;
|
||||
|
||||
Opts.OptRecordFile = Args.getLastArgValue(OPT_opt_record_file);
|
||||
if (!Opts.OptRecordFile.empty())
|
||||
NeedLocTracking = true;
|
||||
|
||||
if (Arg *A = Args.getLastArg(OPT_Rpass_EQ)) {
|
||||
Opts.OptimizationRemarkPattern =
|
||||
GenerateOptimizationRemarkRegex(Diags, Args, A);
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
foo
|
||||
# Func Hash:
|
||||
0
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
30
|
||||
|
||||
bar
|
||||
# Func Hash:
|
||||
0
|
||||
# Num Counters:
|
||||
1
|
||||
# Counter Values:
|
||||
30
|
||||
|
||||
Test
|
||||
# Func Hash:
|
||||
269
|
||||
# Num Counters:
|
||||
3
|
||||
# Counter Values:
|
||||
1
|
||||
30
|
||||
15
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj
|
||||
// RUN: cat %t.yaml | FileCheck %s
|
||||
// RUN: llvm-profdata merge %S/Inputs/opt-record.proftext -o %t.profdata
|
||||
// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -fprofile-instrument-use-path=%t.profdata %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj
|
||||
// RUN: cat %t.yaml | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PGO %s
|
||||
// REQUIRES: x86-registered-target
|
||||
|
||||
void bar();
|
||||
void foo() { bar(); }
|
||||
|
||||
void Test(int *res, int *c, int *d, int *p, int n) {
|
||||
int i;
|
||||
|
||||
#pragma clang loop vectorize(assume_safety)
|
||||
for (i = 0; i < 1600; i++) {
|
||||
res[i] = (p[i] == 0) ? res[i] : res[i] + d[i];
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: --- !Missed
|
||||
// CHECK: Pass: inline
|
||||
// CHECK: Name: NoDefinition
|
||||
// CHECK: DebugLoc:
|
||||
// CHECK: Function: foo
|
||||
// CHECK-PGO: Hotness:
|
||||
|
||||
// CHECK: --- !Passed
|
||||
// CHECK: Pass: loop-vectorize
|
||||
// CHECK: Name: Vectorized
|
||||
// CHECK: DebugLoc:
|
||||
// CHECK: Function: Test
|
||||
// CHECK-PGO: Hotness:
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// RUN: %clang -### -S -o FOO -fsave-optimization-record %s 2>&1 | FileCheck %s
|
||||
// RUN: %clang -### -c -o FOO -fsave-optimization-record %s 2>&1 | FileCheck %s
|
||||
// RUN: %clang -### -c -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
|
||||
// RUN: %clang -### -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
|
||||
// RUN: %clang -### -S -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
|
||||
// RUN: %clang -### -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
|
||||
// RUN: %clang -### -S -o FOO -fsave-optimization-record -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ
|
||||
|
||||
// CHECK: "-cc1"
|
||||
// CHECK: "-opt-record-file" "FOO.opt.yaml"
|
||||
|
||||
// CHECK-NO-O: "-cc1"
|
||||
// CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml"
|
||||
// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-device-cuda-nvptx64-nvidia-cuda-sm_20.opt.yaml"
|
||||
|
||||
// CHECK-EQ: "-cc1"
|
||||
// CHECK-EQ: "-opt-record-file" "BAR.txt"
|
||||
|
Loading…
Reference in New Issue