Retry "[llvm-profdata] Add option to ingest filepaths from a file"

Changes since the initial commit:
- Normalize file paths read from the file to prevent Windows path
  separators from escaping parts of the path.
- Since we need to store the normalized file paths in WeightedFile,
  don't do tricky things to keep the source MemoryBuffer alive.

Differential Revision: http://reviews.llvm.org/D20980

llvm-svn: 271949
This commit is contained in:
Vedant Kumar 2016-06-06 22:39:22 +00:00
parent 71bb79430f
commit d8ee75b8f5
3 changed files with 84 additions and 14 deletions

View File

@ -44,6 +44,9 @@ interpreted as relatively more important than a shorter run. Depending on the
nature of the training runs it may be useful to adjust the weight given to each nature of the training runs it may be useful to adjust the weight given to each
input file by using the ``-weighted-input`` option. input file by using the ``-weighted-input`` option.
Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional
arguments are processed once for each time they are seen.
OPTIONS OPTIONS
^^^^^^^ ^^^^^^^
@ -65,6 +68,12 @@ OPTIONS
Input files specified without using this option are assigned a default Input files specified without using this option are assigned a default
weight of 1. Examples are shown below. weight of 1. Examples are shown below.
.. option:: -input-files=path, -f=path
Specify a file which contains a list of files to merge. The entries in this
file are newline-separated. Lines starting with '#' are skipped. Entries may
be of the form <filename> or <weight>,<filename>.
.. option:: -instr (default) .. option:: -instr (default)
Specify that the input profile is an instrumentation-based profile. Specify that the input profile is an instrumentation-based profile.

View File

@ -0,0 +1,16 @@
RUN: printf '# comment 1\n' > %t
RUN: printf ' # comment 2\n' >> %t
RUN: printf 'bar\n' >> %t
RUN: printf ' baz\n' >> %t
RUN: printf '2,%t.weighted\n' >> %t
RUN: printf ' ' > %t.weighted
RUN: llvm-profdata merge -input-files %t -dump-input-file-list foo -o /dev/null | FileCheck %s
RUN: llvm-profdata merge -f %t -dump-input-file-list foo -o /dev/null | FileCheck %s
CHECK: 1,foo
CHECK-NEXT: 1,bar
CHECK-NEXT: 1,baz
CHECK-NEXT: 2,{{.*}}.weighted

View File

@ -108,12 +108,12 @@ static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
} }
struct WeightedFile { struct WeightedFile {
StringRef Filename; std::string Filename;
uint64_t Weight; uint64_t Weight;
WeightedFile() {} WeightedFile() {}
WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {} WeightedFile(std::string F, uint64_t W) : Filename{F}, Weight{W} {}
}; };
typedef SmallVector<WeightedFile, 5> WeightedFileVector; typedef SmallVector<WeightedFile, 5> WeightedFileVector;
@ -209,18 +209,47 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
} }
static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
StringRef WeightStr, FileName; StringRef WeightStr, FilenameStr;
std::tie(WeightStr, FileName) = WeightedFilename.split(','); std::tie(WeightStr, FilenameStr) = WeightedFilename.split(',');
uint64_t Weight; uint64_t Weight;
if (WeightStr.getAsInteger(10, Weight) || Weight < 1) if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
exitWithError("Input weight must be a positive integer."); exitWithError("Input weight must be a positive integer.");
if (!sys::fs::exists(FileName)) SmallString<256> CanonicalFilename;
exitWithErrorCode(make_error_code(errc::no_such_file_or_directory), sys::path::native(FilenameStr, CanonicalFilename);
FileName);
return WeightedFile(FileName, Weight); if (!sys::fs::exists(CanonicalFilename))
exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
CanonicalFilename);
return WeightedFile(StringRef(CanonicalFilename).str(), Weight);
}
static void parseInputFilenamesFile(const StringRef &InputFilenamesFile,
WeightedFileVector &WFV) {
if (InputFilenamesFile == "")
return;
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
if (!BufOrError)
exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
auto Buffer = std::move(*BufOrError);
StringRef Data = Buffer->getBuffer();
SmallVector<StringRef, 8> Entries;
Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (const StringRef &FileWeightEntry : Entries) {
StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
// Skip comments.
if (SanitizedEntry.startswith("#"))
continue;
// If there's no comma, it's an unweighted profile.
else if (SanitizedEntry.rfind(',') == StringRef::npos)
WFV.emplace_back(SanitizedEntry, 1);
else
WFV.emplace_back(parseWeightedFile(SanitizedEntry));
}
} }
static int merge_main(int argc, const char *argv[]) { static int merge_main(int argc, const char *argv[]) {
@ -228,6 +257,15 @@ static int merge_main(int argc, const char *argv[]) {
cl::desc("<filename...>")); cl::desc("<filename...>"));
cl::list<std::string> WeightedInputFilenames("weighted-input", cl::list<std::string> WeightedInputFilenames("weighted-input",
cl::desc("<weight>,<filename>")); cl::desc("<weight>,<filename>"));
cl::opt<std::string> InputFilenamesFile(
"input-files", cl::init(""),
cl::desc("Path to file containing newline-separated "
"[<weight>,]<filename> entries"));
cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
cl::aliasopt(InputFilenamesFile));
cl::opt<bool> DumpInputFileList(
"dump-input-file-list", cl::init(false), cl::Hidden,
cl::desc("Dump the list of input files and their weights, then exit"));
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"), cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::init("-"), cl::Required, cl::init("-"), cl::Required,
cl::desc("Output file")); cl::desc("Output file"));
@ -249,15 +287,22 @@ static int merge_main(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
if (InputFilenames.empty() && WeightedInputFilenames.empty()) WeightedFileVector WeightedInputs;
for (StringRef Filename : InputFilenames)
WeightedInputs.emplace_back(Filename, 1);
for (StringRef WeightedFilename : WeightedInputFilenames)
WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
parseInputFilenamesFile(InputFilenamesFile, WeightedInputs);
if (WeightedInputs.empty())
exitWithError("No input files specified. See " + exitWithError("No input files specified. See " +
sys::path::filename(argv[0]) + " -help"); sys::path::filename(argv[0]) + " -help");
WeightedFileVector WeightedInputs; if (DumpInputFileList) {
for (StringRef Filename : InputFilenames) for (auto &WF : WeightedInputs)
WeightedInputs.push_back(WeightedFile(Filename, 1)); outs() << WF.Weight << "," << WF.Filename << "\n";
for (StringRef WeightedFilename : WeightedInputFilenames) return 0;
WeightedInputs.push_back(parseWeightedFile(WeightedFilename)); }
if (ProfileKind == instr) if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat, mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,