Reland "[memprof] Extend the index prof format to include memory profiles."

This reverts commit 85355a560a.

This patch adds support for optional memory profile information to be
included with and indexed profile. The indexed profile header adds a new
field which points to the offset of the memory profile section (if
present) in the indexed profile. For users who do not utilize this
feature the only overhead is a 64-bit offset in the header.

The memory profile section contains (1) profile metadata describing the
information recorded for each entry (2) an on-disk hashtable containing
the profile records indexed via llvm::md5(function_name). We chose to
introduce a separate hash table instead of the existing one since the
indexing for the instrumented fdo hash table is based on a CFG hash
which itself is perturbed by memprof instrumentation.

Differential Revision: https://reviews.llvm.org/D118653
This commit is contained in:
Snehasish Kumar 2022-02-14 11:52:42 -08:00
parent 331e8e4e27
commit 807ba7aace
19 changed files with 649 additions and 32 deletions

View File

@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
#define INSTR_PROF_INDEX_VERSION 8
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 5
@ -662,6 +662,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* The 59th bit indicates whether to use debug info to correlate profiles.
* The 60th bit indicates single byte coverage instrumentation.
* The 61st bit indicates function entry instrumentation only.
* The 62nd bit indicates whether memory profile information is present.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@ -671,6 +672,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
#define VARIANT_MASK_MEMPROF (0x1ULL << 62)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias

View File

@ -287,7 +287,8 @@ enum class InstrProfKind {
CS = 0x8, // A context sensitive IR-level profile.
SingleByteCoverage = 0x10, // Use single byte probes for coverage.
FunctionEntryOnly = 0x20, // Only instrument the function entry basic block.
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/FunctionEntryOnly)
MemProf = 0x40, // A memory profile collected using -fmemory-profile.
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf)
};
const std::error_category &instrprof_category();
@ -1011,7 +1012,9 @@ enum ProfVersion {
Version6 = 6,
// An additional counter is added around logical operators.
Version7 = 7,
// The current version is 7.
// An additional (optional) memory profile type is added.
Version8 = 8,
// The current version is 8.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
@ -1028,6 +1031,7 @@ struct Header {
uint64_t Unused; // Becomes unused since version 4
uint64_t HashType;
uint64_t HashOffset;
uint64_t MemProfOffset;
// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
// the new field is read correctly.

View File

@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
#define INSTR_PROF_INDEX_VERSION 8
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 5
@ -662,6 +662,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* The 59th bit indicates whether to use debug info to correlate profiles.
* The 60th bit indicates single byte coverage instrumentation.
* The 61st bit indicates function entry instrumentation only.
* The 62nd bit indicates whether memory profile information is present.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@ -671,6 +672,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
#define VARIANT_MASK_MEMPROF (0x1ULL << 62)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias

View File

@ -19,6 +19,7 @@
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
@ -471,6 +472,9 @@ struct InstrProfReaderIndexBase {
using OnDiskHashTableImplV3 =
OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
using MemProfHashTable =
OnDiskIterableChainedHashTable<memprof::MemProfRecordLookupTrait>;
template <typename HashTableImpl>
class InstrProfReaderItaniumRemapper;
@ -556,6 +560,11 @@ private:
std::unique_ptr<ProfileSummary> Summary;
/// Context sensitive profile summary data.
std::unique_ptr<ProfileSummary> CS_Summary;
/// MemProf profile schema (if available).
memprof::MemProfSchema Schema;
/// MemProf profile data on-disk indexed via llvm::md5(FunctionName).
std::unique_ptr<MemProfHashTable> MemProfTable;
// Index to the current record in the record array.
unsigned RecordIndex;
@ -609,6 +618,11 @@ public:
Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash);
/// Return the memprof records for the function identified by
/// llvm::md5(Name).
Expected<ArrayRef<memprof::MemProfRecord>>
getMemProfRecord(uint64_t FuncNameHash);
/// Fill Counts with the profile data for the given function name.
Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
std::vector<uint64_t> &Counts);

View File

@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
@ -37,6 +38,11 @@ public:
private:
bool Sparse;
StringMap<ProfilingData> FunctionData;
// A map to hold memprof data per function. The lower 64 bits obtained from
// the md5 hash of the function name is used to index into the map.
memprof::FunctionMemProfMap MemProfData;
// An enum describing the attributes of the profile.
InstrProfKind ProfileKind = InstrProfKind::Unknown;
// Use raw pointer here for the incomplete type object.
@ -57,6 +63,9 @@ public:
addRecord(std::move(I), 1, Warn);
}
void addRecord(const ::llvm::memprof::MemProfRecord &MR,
function_ref<void(Error)> Warn);
/// Merge existing function counts from the given writer.
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn);
@ -112,6 +121,8 @@ public:
return Error::success();
}
InstrProfKind getProfileKind() const { return ProfileKind; }
// Internal interface for testing purpose only.
void setValueProfDataEndianness(support::endianness Endianness);
void setOutputSparse(bool Sparse);

View File

@ -5,6 +5,7 @@
#include <string>
#include <vector>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/ProfileCommon.h"
@ -134,18 +135,52 @@ private:
};
struct MemProfRecord {
struct Frame {
std::string Function;
// Describes a call frame for a dynamic allocation context. The contents of
// the frame are populated by symbolizing the stack depot call frame from the
// compiler runtime.
PACKED(struct Frame {
// A uuid (uint64_t) identifying the function. It is obtained by
// llvm::md5(FunctionName) which returns the lower 64 bits.
GlobalValue::GUID Function;
// The source line offset of the call from the beginning of parent function.
uint32_t LineOffset;
// The source column number of the call to help distinguish multiple calls
// on the same line.
uint32_t Column;
// Whether the current frame is inlined.
bool IsInlineFrame;
Frame(std::string Str, uint32_t Off, uint32_t Col, bool Inline)
: Function(std::move(Str)), LineOffset(Off), Column(Col),
IsInlineFrame(Inline) {}
};
Frame(uint64_t Hash, uint32_t Off, uint32_t Col, bool Inline)
: Function(Hash), LineOffset(Off), Column(Col), IsInlineFrame(Inline) {}
bool operator==(const Frame &Other) const {
return Other.Function == Function && Other.LineOffset == LineOffset &&
Other.Column == Column && Other.IsInlineFrame == IsInlineFrame;
}
bool operator!=(const Frame &Other) const { return !operator==(Other); }
// Write the contents of the frame to the ostream \p OS.
void write(raw_ostream & OS) const {
using namespace support;
endian::Writer LE(OS, little);
// If the type of the GlobalValue::GUID changes, then we need to update
// the reader and the writer.
static_assert(std::is_same<GlobalValue::GUID, uint64_t>::value,
"Expect GUID to be uint64_t.");
LE.write<uint64_t>(Function);
LE.write<uint32_t>(LineOffset);
LE.write<uint32_t>(Column);
LE.write<bool>(IsInlineFrame);
}
});
// The dynamic calling context for the allocation.
std::vector<Frame> CallStack;
// The statistics obtained from the runtime for the allocation.
PortableMemInfoBlock Info;
void clear() {
@ -153,6 +188,12 @@ struct MemProfRecord {
Info.clear();
}
size_t serializedSize() const {
return sizeof(uint64_t) + // The number of frames to serialize.
sizeof(Frame) * CallStack.size() + // The contents of the frames.
PortableMemInfoBlock::serializedSize(); // The size of the payload.
}
// Prints out the contents of the memprof record in YAML.
void print(llvm::raw_ostream &OS) const {
OS << " Callstack:\n";
@ -168,6 +209,138 @@ struct MemProfRecord {
Info.printYAML(OS);
}
bool operator==(const MemProfRecord &Other) const {
if (Other.Info != Info)
return false;
if (Other.CallStack.size() != CallStack.size())
return false;
for (size_t I = 0; I < Other.CallStack.size(); I++) {
if (Other.CallStack[I] != CallStack[I])
return false;
}
return true;
}
};
// Serializes the memprof records in \p Records to the ostream \p OS based on
// the schema provided in \p Schema.
void serializeRecords(const ArrayRef<MemProfRecord> Records,
const MemProfSchema &Schema, raw_ostream &OS);
// Deserializes memprof records from the Buffer
SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
const unsigned char *Buffer);
// Reads a memprof schema from a buffer. All entries in the buffer are
// interpreted as uint64_t. The first entry in the buffer denotes the number of
// ids in the schema. Subsequent entries are integers which map to memprof::Meta
// enum class entries. After successfully reading the schema, the pointer is one
// byte past the schema contents.
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
using FunctionMemProfMap =
DenseMap<uint64_t, SmallVector<memprof::MemProfRecord, 4>>;
/// Trait for lookups into the on-disk hash table for memprof format in the
/// indexed profile.
class MemProfRecordLookupTrait {
public:
using data_type = ArrayRef<MemProfRecord>;
using internal_key_type = uint64_t;
using external_key_type = uint64_t;
using hash_value_type = uint64_t;
using offset_type = uint64_t;
MemProfRecordLookupTrait() = delete;
MemProfRecordLookupTrait(const MemProfSchema &S) : Schema(S) {}
static bool EqualKey(uint64_t A, uint64_t B) { return A == B; }
static uint64_t GetInternalKey(uint64_t K) { return K; }
static uint64_t GetExternalKey(uint64_t K) { return K; }
hash_value_type ComputeHash(uint64_t K) { return K; }
static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char *&D) {
using namespace support;
offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
return std::make_pair(KeyLen, DataLen);
}
uint64_t ReadKey(const unsigned char *D, offset_type /*Unused*/) {
using namespace support;
return endian::readNext<external_key_type, little, unaligned>(D);
}
data_type ReadData(uint64_t K, const unsigned char *D,
offset_type /*Unused*/) {
Records = deserializeRecords(Schema, D);
return Records;
}
private:
// Holds the memprof schema used to deserialize records.
MemProfSchema Schema;
// Holds the records from one function deserialized from the indexed format.
llvm::SmallVector<MemProfRecord, 4> Records;
};
class MemProfRecordWriterTrait {
public:
using key_type = uint64_t;
using key_type_ref = uint64_t;
using data_type = ArrayRef<MemProfRecord>;
using data_type_ref = ArrayRef<MemProfRecord>;
using hash_value_type = uint64_t;
using offset_type = uint64_t;
// Pointer to the memprof schema to use for the generator. Unlike the reader
// we must use a default constructor with no params for the writer trait so we
// have a public member which must be initialized by the user.
MemProfSchema *Schema = nullptr;
MemProfRecordWriterTrait() = default;
static hash_value_type ComputeHash(key_type_ref K) { return K; }
static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
using namespace support;
endian::Writer LE(Out, little);
offset_type N = sizeof(K);
LE.write<offset_type>(N);
offset_type M = 0;
M += sizeof(uint64_t);
for (const auto &Record : V) {
M += Record.serializedSize();
}
LE.write<offset_type>(M);
return std::make_pair(N, M);
}
void EmitKey(raw_ostream &Out, key_type_ref K, offset_type /*Unused*/) {
using namespace support;
endian::Writer LE(Out, little);
LE.write<uint64_t>(K);
}
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
assert(Schema != nullptr && "MemProf schema is not initialized!");
serializeRecords(V, *Schema, Out);
}
};
} // namespace memprof

View File

@ -1,5 +1,5 @@
#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC
#define LLVM_PROFILEDATA_MEMPROFDATA_INC
#ifndef MEMPROF_DATA_INC
#define MEMPROF_DATA_INC
/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\
|*
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

View File

@ -66,6 +66,9 @@ public:
return Iterator(this);
}
// The RawMemProfReader only holds memory profile information.
InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
// Constructor for unittests only.
RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
llvm::SmallVectorImpl<SegmentEntry> &Seg,

View File

@ -4,6 +4,7 @@ add_llvm_component_library(LLVMProfileData
InstrProfCorrelator.cpp
InstrProfReader.cpp
InstrProfWriter.cpp
MemProf.cpp
ProfileSummaryBuilder.cpp
SampleProf.cpp
SampleProfReader.cpp

View File

@ -1349,8 +1349,15 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
return make_error<InstrProfError>(instrprof_error::unsupported_version);
switch (GET_VERSION(H.formatVersion())) {
// When a new field is added in the header add a case statement here to
// populate it.
// When a new field is added in the header add a case statement here to
// populate it.
static_assert(
IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
"Please update the reading code below if a new field has been added, "
"if not add a case statement to fall through to the latest version.");
case 8ull:
H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
LLVM_FALLTHROUGH;
default: // Version7 (when the backwards compatible header was introduced).
H.HashType = read(Buffer, offsetOf(&Header::HashType));
H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
@ -1361,9 +1368,15 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
size_t Header::size() const {
switch (GET_VERSION(formatVersion())) {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
case 8ull:
return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
default: // Version7 (when the backwards compatible header was introduced).
return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset);
}

View File

@ -19,7 +19,9 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/RawMemProfReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
@ -57,6 +59,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) {
ProfileKind |= InstrProfKind::FunctionEntryOnly;
}
if (Version & VARIANT_MASK_MEMPROF) {
ProfileKind |= InstrProfKind::MemProf;
}
return ProfileKind;
}
@ -955,10 +960,35 @@ Error IndexedInstrProfReader::readHeader() {
uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
// The rest of the file is an on disk hash table.
// The hash table with profile counts comes next.
auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
// The MemProfOffset field in the header is only valid when the format version
// is higher than 8 (when it was introduced).
if (GET_VERSION(Header->Version) >= 8 &&
Header->Version & VARIANT_MASK_MEMPROF) {
uint64_t MemProfOffset =
endian::byte_swap<uint64_t, little>(Header->MemProfOffset);
const unsigned char *Ptr = Start + MemProfOffset;
// The value returned from Generator.Emit.
const uint64_t TableOffset =
support::endian::readNext<uint64_t, little, unaligned>(Ptr);
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
return SchemaOr.takeError();
Schema = SchemaOr.get();
// Now initialize the table reader with a pointer into data buffer.
MemProfTable.reset(MemProfHashTable::Create(
/*Buckets=*/Start + TableOffset,
/*Payload=*/Ptr,
/*Base=*/Start, memprof::MemProfRecordLookupTrait(Schema)));
}
// Load the remapping table now if requested.
if (RemappingBuffer) {
Remapper = std::make_unique<
@ -1003,6 +1033,17 @@ IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
return error(instrprof_error::hash_mismatch);
}
Expected<ArrayRef<memprof::MemProfRecord>>
IndexedInstrProfReader::getMemProfRecord(uint64_t FuncNameHash) {
auto Iter = MemProfTable->find(FuncNameHash);
if (Iter == MemProfTable->end())
// TODO: Add memprof specific errors.
return make_error<InstrProfError>(instrprof_error::hash_mismatch,
"memprof record not found for hash " +
Twine(FuncNameHash));
return *Iter;
}
Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
uint64_t FuncHash,
std::vector<uint64_t> &Counts) {

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
@ -63,11 +64,16 @@ public:
if (IsFDOStream) {
raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
const uint64_t LastPos = FDOStream.tell();
for (int K = 0; K < NItems; K++) {
FDOStream.seek(P[K].Pos);
for (int I = 0; I < P[K].N; I++)
write(P[K].D[I]);
}
// Reset the stream to the last position after patching so that users
// don't accidentally overwrite data. This makes it consistent with
// the string stream below which replaces the data directly.
FDOStream.seek(LastPos);
} else {
raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
std::string &Data = SOStream.str(); // with flush
@ -248,11 +254,39 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
Dest.sortValueData();
}
void InstrProfWriter::addRecord(const memprof::MemProfRecord &MR,
function_ref<void(Error)> Warn) {
// Use 0 as a sentinel value since its highly unlikely that the lower 64-bits
// of a 128 bit md5 hash will be all zeros.
// TODO: Move this Key frame detection to the contructor to avoid having to
// scan all the callstacks again when adding a new record.
uint64_t Key = 0;
for (auto Iter = MR.CallStack.rbegin(), End = MR.CallStack.rend();
Iter != End; Iter++) {
if (!Iter->IsInlineFrame) {
Key = Iter->Function;
break;
}
}
if (Key == 0) {
Warn(make_error<InstrProfError>(
instrprof_error::invalid_prof,
"could not determine leaf function for memprof record."));
}
MemProfData[Key].push_back(MR);
}
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn) {
for (auto &I : IPW.FunctionData)
for (auto &Func : I.getValue())
addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
for (auto &I : IPW.MemProfData)
for (const auto &MR : I.second)
addRecord(MR, Warn);
}
bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
@ -297,6 +331,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
Generator.insert(I.getKey(), &I.getValue());
// Write the header.
IndexedInstrProf::Header Header;
Header.Magic = IndexedInstrProf::Magic;
@ -311,16 +346,18 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
Header.Version |= VARIANT_MASK_MEMPROF;
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
Header.HashOffset = 0;
Header.MemProfOffset = 0;
int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
// Only write out all the fields except 'HashOffset'. We need
// to remember the offset of that field to allow back patching
// later.
for (int I = 0; I < N - 1; I++)
// Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We
// need to remember the offset of these fields to allow back patching later.
for (int I = 0; I < N - 2; I++)
OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
// Save the location of Header.HashOffset field in \c OS.
@ -328,6 +365,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Reserve the space for HashOffset field.
OS.write(0);
// Save the location of MemProf profile data. This is stored in two parts as
// the schema and as a separate on-disk chained hashtable.
uint64_t MemProfSectionOffset = OS.tell();
// Reserve space for the MemProf table field to be patched later if this
// profile contains memory profile information.
OS.write(0);
// Reserve space to write profile summary data.
uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@ -347,6 +391,42 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// Write the hash table.
uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
// Write the MemProf profile data if we have it. This includes a simple schema
// with the format described below followed by the hashtable:
// uint64_t Offset = MemProfGenerator.Emit
// uint64_t Num schema entries
// uint64_t Schema entry 0
// uint64_t Schema entry 1
// ....
// uint64_t Schema entry N - 1
// OnDiskChainedHashTable MemProfFunctionData
uint64_t MemProfSectionStart = 0;
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
MemProfSectionStart = OS.tell();
OS.write(0ULL); // Reserve space for the offset.
auto Schema = memprof::PortableMemInfoBlock::getSchema();
OS.write(static_cast<uint64_t>(Schema.size()));
for (const auto Id : Schema) {
OS.write(static_cast<uint64_t>(Id));
}
auto MemProfWriter = std::make_unique<memprof::MemProfRecordWriterTrait>();
MemProfWriter->Schema = &Schema;
OnDiskChainedHashTableGenerator<memprof::MemProfRecordWriterTrait>
MemProfGenerator;
for (const auto &I : MemProfData) {
// Insert the key (func hash) and value (vector of memprof records).
MemProfGenerator.insert(I.first, I.second);
}
uint64_t TableOffset = MemProfGenerator.Emit(OS.OS, *MemProfWriter);
PatchItem PatchItems[] = {
{MemProfSectionStart, &TableOffset, 1},
};
OS.patch(PatchItems, 1);
}
// Allocate space for data to be serialized out.
std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
IndexedInstrProf::allocSummary(SummarySize);
@ -369,6 +449,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
PatchItem PatchItems[] = {
// Patch the Header.HashOffset field.
{HashTableStartFieldOffset, &HashTableStart, 1},
// Patch the Header.MemProfOffset (=0 for profiles without MemProf data).
{MemProfSectionOffset, &MemProfSectionStart, 1},
// Patch the summary data.
{SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
(int)(SummarySize / sizeof(uint64_t))},

View File

@ -0,0 +1,73 @@
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
namespace llvm {
namespace memprof {
void serializeRecords(const ArrayRef<MemProfRecord> Records,
const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;
endian::Writer LE(OS, little);
LE.write<uint64_t>(Records.size());
for (const MemProfRecord &MR : Records) {
LE.write<uint64_t>(MR.CallStack.size());
for (const MemProfRecord::Frame &F : MR.CallStack) {
F.write(OS);
}
MR.Info.serialize(Schema, OS);
}
}
SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
const unsigned char *Ptr) {
using namespace support;
SmallVector<MemProfRecord, 4> Records;
const uint64_t NumRecords =
endian::readNext<uint64_t, little, unaligned>(Ptr);
for (uint64_t I = 0; I < NumRecords; I++) {
MemProfRecord MR;
const uint64_t NumFrames =
endian::readNext<uint64_t, little, unaligned>(Ptr);
for (uint64_t J = 0; J < NumFrames; J++) {
const auto F = *reinterpret_cast<const MemProfRecord::Frame *>(Ptr);
Ptr += sizeof(MemProfRecord::Frame);
MR.CallStack.push_back(F);
}
MR.Info.deserialize(Schema, Ptr);
Ptr += PortableMemInfoBlock::serializedSize();
Records.push_back(MR);
}
return Records;
}
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
using namespace support;
const unsigned char *Ptr = Buffer;
const uint64_t NumSchemaIds =
endian::readNext<uint64_t, little, unaligned>(Ptr);
if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) {
return make_error<InstrProfError>(instrprof_error::malformed,
"memprof schema invalid");
}
MemProfSchema Result;
for (size_t I = 0; I < NumSchemaIds; I++) {
const uint64_t Tag = endian::readNext<uint64_t, little, unaligned>(Ptr);
if (Tag >= static_cast<uint64_t>(Meta::Size)) {
return make_error<InstrProfError>(instrprof_error::malformed,
"memprof schema invalid");
}
Result.push_back(static_cast<Meta>(Tag));
}
// Advace the buffer to one past the schema if we succeeded.
Buffer = Ptr;
return Result;
}
} // namespace memprof
} // namespace llvm

View File

@ -362,7 +362,12 @@ Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
const auto &Frame = DI.getFrame(I);
Record.CallStack.emplace_back(
std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))),
// We use the function guid which we expect to be a uint64_t. At this
// time, it is the lower 64 bits of the md5 of the function name. Any
// suffix with .llvm. is trimmed since these are added by thinLTO
// global promotion. At the time the profile is consumed, these
// suffixes will not be present.
Function::getGUID(trimSuffix(Frame.FunctionName)),
Frame.Line - Frame.StartLine, Frame.Column,
// Only the first entry is not an inlined location.
I != 0);

Binary file not shown.

View File

@ -0,0 +1,47 @@
REQUIRES: x86_64-linux
The input memprof and instrumented raw profiles were generated from the following source code:
```
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv) {
char *x = (char *)malloc(10);
memset(x, 0, 10);
free(x);
x = (char *)malloc(10);
memset(x, 0, 10);
free(x);
return 0;
}
```
Steps to collect the memprof raw profile and the instrprof raw profile:
```
# Collect instrprof profile with name compression disabled since some buildbots
# do not have zlib.
clang -mllvm -enable-name-compression=false -fprofile-generate source.c -o instr.out
./instr.out
mv *.profraw basic.profraw
# Collect memprof profile.
clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
-fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \
-fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe
env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw
```
RUN: llvm-profdata merge %p/Inputs/basic.profraw %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof
RUN: llvm-profdata show %t.prof | FileCheck %s
For now we only check the validity of the instrumented profile since we don't
have a way to display the contents of the memprof indexed format yet.
CHECK: Instrumentation level: IR entry_first = 0
CHECK: Total functions: 1
CHECK: Maximum function count: 1
CHECK: Maximum internal block count: 0

View File

@ -239,7 +239,7 @@ static void overlapInput(const std::string &BaseFilename,
/// Load an input into a writer context.
static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
const InstrProfCorrelator *Correlator,
WriterContext *WC) {
const StringRef ProfiledBinary, WriterContext *WC) {
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
// Copy the filename, because llvm::ThreadPool copied the input "const
@ -247,6 +247,35 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
// invalid outside of this packaged task.
std::string Filename = Input.Filename;
using ::llvm::memprof::RawMemProfReader;
if (RawMemProfReader::hasFormat(Input.Filename)) {
auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
if (!ReaderOrErr) {
exitWithError(ReaderOrErr.takeError(), Input.Filename);
}
std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
// Check if the profile types can be merged, e.g. clang frontend profiles
// should not be merged with memprof profiles.
if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
consumeError(std::move(E));
WC->Errors.emplace_back(
make_error<StringError>(
"Cannot merge MemProf profile with Clang generated profile.",
std::error_code()),
Filename);
return;
}
// Add the records into the writer context.
for (const memprof::MemProfRecord &MR : *Reader) {
WC->Writer.addRecord(MR, [&](Error E) {
instrprof_error IPE = InstrProfError::take(std::move(E));
WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
});
}
return;
}
auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator);
if (Error E = ReaderOrErr.takeError()) {
// Skip the empty profiles by returning sliently.
@ -332,7 +361,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
SymbolRemapper *Remapper,
StringRef OutputFilename,
ProfileFormat OutputFormat, bool OutputSparse,
unsigned NumThreads, FailureMode FailMode) {
unsigned NumThreads, FailureMode FailMode,
const StringRef ProfiledBinary) {
if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary &&
OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text)
exitWithError("unknown format is specified");
@ -365,14 +395,15 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
if (NumThreads == 1) {
for (const auto &Input : Inputs)
loadInput(Input, Remapper, Correlator.get(), Contexts[0].get());
loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
Contexts[0].get());
} else {
ThreadPool Pool(hardware_concurrency(NumThreads));
// Load the inputs in parallel (N/NumThreads serial steps).
unsigned Ctx = 0;
for (const auto &Input : Inputs) {
Pool.async(loadInput, Input, Remapper, Correlator.get(),
Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
Contexts[Ctx].get());
Ctx = (Ctx + 1) % NumThreads;
}
@ -589,7 +620,7 @@ static void supplementInstrProfile(
SmallSet<instrprof_error, 4> WriterErrorCodes;
auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
WriterErrorCodes);
loadInput(Inputs[0], nullptr, nullptr, WC.get());
loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
if (WC->Errors.size() > 0)
exitWithError(std::move(WC->Errors[0].first), InstrFilename);
@ -969,6 +1000,9 @@ static int merge_main(int argc, const char *argv[]) {
cl::opt<std::string> DebugInfoFilename(
"debug-info", cl::init(""),
cl::desc("Use the provided debug info to correlate the raw profile."));
cl::opt<std::string> ProfiledBinary(
"profiled-binary", cl::init(""),
cl::desc("Path to binary from which the profile was collected."));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@ -1011,7 +1045,7 @@ static int merge_main(int argc, const char *argv[]) {
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(),
OutputFilename, OutputFormat, OutputSparse, NumThreads,
FailureMode);
FailureMode, ProfiledBinary);
else
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, ProfileSymbolListFile, CompressAllSections,
@ -1042,7 +1076,7 @@ static void overlapInstrProfile(const std::string &BaseFilename,
OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
exit(0);
}
loadInput(WeightedInput, nullptr, nullptr, &Context);
loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
IsCS);
Overlap.dump(OS);

View File

@ -12,6 +12,7 @@
#include "llvm/IR/Module.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Compression.h"
#include "llvm/Testing/Support/Error.h"
#include "llvm/Testing/Support/SupportHelpers.h"
@ -221,6 +222,67 @@ TEST_F(InstrProfTest, test_writer_merge) {
ASSERT_EQ(0U, R->Counts[1]);
}
TEST_F(InstrProfTest, test_memprof) {
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());
llvm::memprof::MemProfRecord MR;
MR.CallStack.push_back({0x123, 1, 2, false});
MR.CallStack.push_back({0x345, 3, 4, true});
Writer.addRecord(MR, Err);
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
auto RecordsOr = Reader->getMemProfRecord(0x123);
ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded());
const auto Records = RecordsOr.get();
ASSERT_EQ(Records.size(), 1U);
EXPECT_EQ(Records[0], MR);
}
TEST_F(InstrProfTest, test_memprof_merge) {
Writer.addRecord({"func1", 0x1234, {42}}, Err);
InstrProfWriter Writer2;
ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());
llvm::memprof::MemProfRecord MR;
MR.CallStack.push_back({0x123, 1, 2, false});
MR.CallStack.push_back({0x345, 3, 4, true});
Writer2.addRecord(MR, Err);
ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()),
Succeeded());
Writer.mergeRecordsFromWriter(std::move(Writer2), Err);
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
Expected<InstrProfRecord> R = Reader->getInstrProfRecord("func1", 0x1234);
EXPECT_THAT_ERROR(R.takeError(), Succeeded());
ASSERT_EQ(1U, R->Counts.size());
ASSERT_EQ(42U, R->Counts[0]);
auto RecordsOr = Reader->getMemProfRecord(0x123);
ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded());
const auto Records = RecordsOr.get();
ASSERT_EQ(Records.size(), 1U);
EXPECT_EQ(Records[0], MR);
}
TEST_F(InstrProfTest, test_memprof_invalid_add_record) {
llvm::memprof::MemProfRecord MR;
// At least one of the frames should be a non-inline frame.
MR.CallStack.push_back({0x123, 1, 2, true});
MR.CallStack.push_back({0x345, 3, 4, true});
auto CheckErr = [](Error &&E) {
EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E)));
};
Writer.addRecord(MR, CheckErr);
}
static const char callee1[] = "callee1";
static const char callee2[] = "callee2";
static const char callee3[] = "callee3";

View File

@ -89,8 +89,8 @@ const DILineInfoSpecifier specifier() {
DILineInfoSpecifier::FunctionNameKind::LinkageName);
}
MATCHER_P4(FrameContains, Function, LineOffset, Column, Inline, "") {
const std::string ExpectedHash = std::to_string(llvm::MD5Hash(Function));
MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
const uint64_t ExpectedHash = llvm::Function::getGUID(FunctionName);
if (arg.Function != ExpectedHash) {
*result_listener << "Hash mismatch";
return false;
@ -103,6 +103,22 @@ MATCHER_P4(FrameContains, Function, LineOffset, Column, Inline, "") {
return false;
}
MATCHER_P(EqualsRecord, Want, "") {
if (arg == Want)
return true;
std::string Explanation;
llvm::raw_string_ostream OS(Explanation);
OS << "\n Want: \n";
Want.print(OS);
OS << "\n Got: \n";
arg.print(OS);
OS.flush();
*result_listener << Explanation;
return false;
}
MemProfSchema getFullSchema() {
MemProfSchema Schema;
#define MIBEntryDef(NameTag, Name, Type) Schema.push_back(Meta::Name);
@ -184,4 +200,38 @@ TEST(MemProf, PortableWrapper) {
EXPECT_EQ(3UL, ReadBlock.getAllocCpuId());
}
TEST(MemProf, RecordSerializationRoundTrip) {
const MemProfSchema Schema = getFullSchema();
llvm::SmallVector<MemProfRecord, 3> Records;
MemProfRecord MR;
MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
/*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
/*dealloc_cpu=*/4);
MR.Info = PortableMemInfoBlock(Info);
MR.CallStack.push_back({0x123, 1, 2, false});
MR.CallStack.push_back({0x345, 3, 4, false});
Records.push_back(MR);
MR.clear();
MR.Info = PortableMemInfoBlock(Info);
MR.CallStack.push_back({0x567, 5, 6, false});
MR.CallStack.push_back({0x789, 7, 8, false});
Records.push_back(MR);
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
serializeRecords(Records, Schema, OS);
OS.flush();
const llvm::SmallVector<MemProfRecord, 4> GotRecords = deserializeRecords(
Schema, reinterpret_cast<const unsigned char *>(Buffer.data()));
ASSERT_TRUE(!GotRecords.empty());
EXPECT_EQ(GotRecords.size(), Records.size());
EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0]));
EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1]));
}
} // namespace