Retry "[llvm-profdata] Add option to ingest filepaths from a file"

Changes since the initial commit:
- Normalize file paths read from the file to prevent Windows path
  separators from escaping parts of the path.
- Since we need to store the normalized file paths in WeightedFile,
  don't do tricky things to keep the source MemoryBuffer alive.

Differential Revision: http://reviews.llvm.org/D20980

llvm-svn: 271949
This commit is contained in:
Vedant Kumar 2016-06-06 22:39:22 +00:00
parent 71bb79430f
commit d8ee75b8f5
3 changed files with 84 additions and 14 deletions

View File

@ -44,6 +44,9 @@ interpreted as relatively more important than a shorter run. Depending on the
nature of the training runs it may be useful to adjust the weight given to each
input file by using the ``-weighted-input`` option.
Profiles passed in via ``-weighted-input``, ``-input-files``, or via positional
arguments are processed once for each time they are seen.
OPTIONS
^^^^^^^
@ -65,6 +68,12 @@ OPTIONS
Input files specified without using this option are assigned a default
weight of 1. Examples are shown below.
.. option:: -input-files=path, -f=path
Specify a file which contains a list of files to merge. The entries in this
file are newline-separated. Lines starting with '#' are skipped. Entries may
be of the form <filename> or <weight>,<filename>.
.. option:: -instr (default)
Specify that the input profile is an instrumentation-based profile.

View File

@ -0,0 +1,16 @@
RUN: printf '# comment 1\n' > %t
RUN: printf ' # comment 2\n' >> %t
RUN: printf 'bar\n' >> %t
RUN: printf ' baz\n' >> %t
RUN: printf '2,%t.weighted\n' >> %t
RUN: printf ' ' > %t.weighted
RUN: llvm-profdata merge -input-files %t -dump-input-file-list foo -o /dev/null | FileCheck %s
RUN: llvm-profdata merge -f %t -dump-input-file-list foo -o /dev/null | FileCheck %s
CHECK: 1,foo
CHECK-NEXT: 1,bar
CHECK-NEXT: 1,baz
CHECK-NEXT: 2,{{.*}}.weighted

View File

@ -108,12 +108,12 @@ static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
}
struct WeightedFile {
StringRef Filename;
std::string Filename;
uint64_t Weight;
WeightedFile() {}
WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
WeightedFile(std::string F, uint64_t W) : Filename{F}, Weight{W} {}
};
typedef SmallVector<WeightedFile, 5> WeightedFileVector;
@ -209,18 +209,47 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
}
static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
StringRef WeightStr, FileName;
std::tie(WeightStr, FileName) = WeightedFilename.split(',');
StringRef WeightStr, FilenameStr;
std::tie(WeightStr, FilenameStr) = WeightedFilename.split(',');
uint64_t Weight;
if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
exitWithError("Input weight must be a positive integer.");
if (!sys::fs::exists(FileName))
exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
FileName);
SmallString<256> CanonicalFilename;
sys::path::native(FilenameStr, CanonicalFilename);
return WeightedFile(FileName, Weight);
if (!sys::fs::exists(CanonicalFilename))
exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
CanonicalFilename);
return WeightedFile(StringRef(CanonicalFilename).str(), Weight);
}
static void parseInputFilenamesFile(const StringRef &InputFilenamesFile,
WeightedFileVector &WFV) {
if (InputFilenamesFile == "")
return;
auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFilenamesFile);
if (!BufOrError)
exitWithErrorCode(BufOrError.getError(), InputFilenamesFile);
auto Buffer = std::move(*BufOrError);
StringRef Data = Buffer->getBuffer();
SmallVector<StringRef, 8> Entries;
Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (const StringRef &FileWeightEntry : Entries) {
StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
// Skip comments.
if (SanitizedEntry.startswith("#"))
continue;
// If there's no comma, it's an unweighted profile.
else if (SanitizedEntry.rfind(',') == StringRef::npos)
WFV.emplace_back(SanitizedEntry, 1);
else
WFV.emplace_back(parseWeightedFile(SanitizedEntry));
}
}
static int merge_main(int argc, const char *argv[]) {
@ -228,6 +257,15 @@ static int merge_main(int argc, const char *argv[]) {
cl::desc("<filename...>"));
cl::list<std::string> WeightedInputFilenames("weighted-input",
cl::desc("<weight>,<filename>"));
cl::opt<std::string> InputFilenamesFile(
"input-files", cl::init(""),
cl::desc("Path to file containing newline-separated "
"[<weight>,]<filename> entries"));
cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
cl::aliasopt(InputFilenamesFile));
cl::opt<bool> DumpInputFileList(
"dump-input-file-list", cl::init(false), cl::Hidden,
cl::desc("Dump the list of input files and their weights, then exit"));
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::init("-"), cl::Required,
cl::desc("Output file"));
@ -249,15 +287,22 @@ static int merge_main(int argc, const char *argv[]) {
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
if (InputFilenames.empty() && WeightedInputFilenames.empty())
WeightedFileVector WeightedInputs;
for (StringRef Filename : InputFilenames)
WeightedInputs.emplace_back(Filename, 1);
for (StringRef WeightedFilename : WeightedInputFilenames)
WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
parseInputFilenamesFile(InputFilenamesFile, WeightedInputs);
if (WeightedInputs.empty())
exitWithError("No input files specified. See " +
sys::path::filename(argv[0]) + " -help");
WeightedFileVector WeightedInputs;
for (StringRef Filename : InputFilenames)
WeightedInputs.push_back(WeightedFile(Filename, 1));
for (StringRef WeightedFilename : WeightedInputFilenames)
WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
if (DumpInputFileList) {
for (auto &WF : WeightedInputs)
outs() << WF.Weight << "," << WF.Filename << "\n";
return 0;
}
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,